From 64a0dd51e901132f962c812824ed50a59cec46e9 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 24 Jun 2026 10:21:45 -0400 Subject: [PATCH 01/13] [WIP] Implement JSON-LD expansion Signed-off-by: Juan Cruz Viotti --- .github/workflows/website-build.yml | 1 + .github/workflows/website-deploy.yml | 1 + CMakeLists.txt | 9 + config.cmake.in | 12 ++ src/core/jsonld/CMakeLists.txt | 14 ++ .../jsonld/include/sourcemeta/core/jsonld.h | 88 +++++++++ .../include/sourcemeta/core/jsonld_error.h | 51 ++++++ src/core/jsonld/jsonld.cc | 18 ++ test/jsonld/CMakeLists.txt | 20 +++ test/jsonld/jsonld_expand_test.cc | 13 ++ test/jsonld/jsonld_suite.cc | 170 ++++++++++++++++++ test/packaging/find_package/CMakeLists.txt | 1 + test/packaging/find_package/hello.cc | 1 + 13 files changed, 399 insertions(+) create mode 100644 src/core/jsonld/CMakeLists.txt create mode 100644 src/core/jsonld/include/sourcemeta/core/jsonld.h create mode 100644 src/core/jsonld/include/sourcemeta/core/jsonld_error.h create mode 100644 src/core/jsonld/jsonld.cc create mode 100644 test/jsonld/CMakeLists.txt create mode 100644 test/jsonld/jsonld_expand_test.cc create mode 100644 test/jsonld/jsonld_suite.cc diff --git a/.github/workflows/website-build.yml b/.github/workflows/website-build.yml index 884083204..ccbb87279 100644 --- a/.github/workflows/website-build.yml +++ b/.github/workflows/website-build.yml @@ -40,6 +40,7 @@ jobs: -DSOURCEMETA_CORE_JSON:BOOL=OFF -DSOURCEMETA_CORE_JSONL:BOOL=OFF -DSOURCEMETA_CORE_JSONPOINTER:BOOL=OFF + -DSOURCEMETA_CORE_JSONLD:BOOL=OFF -DSOURCEMETA_CORE_YAML:BOOL=OFF -DSOURCEMETA_CORE_JSONRPC:BOOL=OFF -DSOURCEMETA_CORE_MCP:BOOL=OFF diff --git a/.github/workflows/website-deploy.yml b/.github/workflows/website-deploy.yml index 013b71d0d..8c83c15d2 100644 --- a/.github/workflows/website-deploy.yml +++ b/.github/workflows/website-deploy.yml @@ -50,6 +50,7 @@ jobs: -DSOURCEMETA_CORE_JSON:BOOL=OFF -DSOURCEMETA_CORE_JSONL:BOOL=OFF -DSOURCEMETA_CORE_JSONPOINTER:BOOL=OFF + -DSOURCEMETA_CORE_JSONLD:BOOL=OFF -DSOURCEMETA_CORE_YAML:BOOL=OFF -DSOURCEMETA_CORE_JSONRPC:BOOL=OFF -DSOURCEMETA_CORE_MCP:BOOL=OFF diff --git a/CMakeLists.txt b/CMakeLists.txt index 22f9a9a78..e45099d99 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ option(SOURCEMETA_CORE_URI "Build the Sourcemeta Core URI library" ON) option(SOURCEMETA_CORE_URITEMPLATE "Build the Sourcemeta Core URI Template library" ON) option(SOURCEMETA_CORE_JSON "Build the Sourcemeta Core JSON library" ON) option(SOURCEMETA_CORE_JSONPOINTER "Build the Sourcemeta Core JSON Pointer library" ON) +option(SOURCEMETA_CORE_JSONLD "Build the Sourcemeta Core JSON-LD library" ON) option(SOURCEMETA_CORE_JSONL "Build the Sourcemeta Core JSONL library" ON) option(SOURCEMETA_CORE_YAML "Build the Sourcemeta Core YAML library" ON) option(SOURCEMETA_CORE_JSONRPC "Build the Sourcemeta Core JSON-RPC library" ON) @@ -171,6 +172,10 @@ if(SOURCEMETA_CORE_JSONPOINTER) add_subdirectory(src/core/jsonpointer) endif() +if(SOURCEMETA_CORE_JSONLD) + add_subdirectory(src/core/jsonld) +endif() + if(SOURCEMETA_CORE_GZIP) find_package(LibDeflate REQUIRED) add_subdirectory(src/core/gzip) @@ -338,6 +343,10 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/jsonpointer) endif() + if(SOURCEMETA_CORE_JSONLD) + add_subdirectory(test/jsonld) + endif() + if(SOURCEMETA_CORE_GZIP) add_subdirectory(test/gzip) endif() diff --git a/config.cmake.in b/config.cmake.in index 5867d812e..f192bb6de 100644 --- a/config.cmake.in +++ b/config.cmake.in @@ -24,6 +24,7 @@ if(NOT SOURCEMETA_CORE_COMPONENTS) list(APPEND SOURCEMETA_CORE_COMPONENTS json) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonl) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonpointer) + list(APPEND SOURCEMETA_CORE_COMPONENTS jsonld) list(APPEND SOURCEMETA_CORE_COMPONENTS yaml) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonrpc) list(APPEND SOURCEMETA_CORE_COMPONENTS mcp) @@ -136,6 +137,17 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") + elseif(component STREQUAL "jsonld") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_text.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_langtag.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonld.cmake") elseif(component STREQUAL "yaml") find_dependency(PCRE2 CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") diff --git a/src/core/jsonld/CMakeLists.txt b/src/core/jsonld/CMakeLists.txt new file mode 100644 index 000000000..9e31d2cd1 --- /dev/null +++ b/src/core/jsonld/CMakeLists.txt @@ -0,0 +1,14 @@ +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME jsonld + PRIVATE_HEADERS error.h + SOURCES jsonld.cc) + +if(SOURCEMETA_CORE_INSTALL) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME jsonld) +endif() + +target_link_libraries(sourcemeta_core_jsonld + PUBLIC sourcemeta::core::json) +target_link_libraries(sourcemeta_core_jsonld + PRIVATE sourcemeta::core::uri) +target_link_libraries(sourcemeta_core_jsonld + PRIVATE sourcemeta::core::langtag) diff --git a/src/core/jsonld/include/sourcemeta/core/jsonld.h b/src/core/jsonld/include/sourcemeta/core/jsonld.h new file mode 100644 index 000000000..c54ec3fdf --- /dev/null +++ b/src/core/jsonld/include/sourcemeta/core/jsonld.h @@ -0,0 +1,88 @@ +#ifndef SOURCEMETA_CORE_JSONLD_H_ +#define SOURCEMETA_CORE_JSONLD_H_ + +#ifndef SOURCEMETA_CORE_JSONLD_EXPORT +#include +#endif + +#include +#include + +#include // std::uint8_t +#include // std::function +#include // std::optional + +/// @defgroup jsonld JSON-LD +/// @brief A JSON-LD 1.1 processor, with support for the JSON-LD 1.0 processing +/// mode. +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +namespace sourcemeta::core { + +/// @ingroup jsonld +/// A resolver callback for loading remote JSON-LD contexts referenced during +/// expansion. Given an absolute IRI, it returns the referenced document, or no +/// value if it cannot be resolved. +using JSONLDResolver = std::function(JSON::StringView)>; + +/// @ingroup jsonld +/// The JSON-LD processing mode +enum class JSONLDVersion : std::uint8_t { V1_0, V1_1 }; + +/// @ingroup jsonld +/// +/// Expand a JSON-LD document into its expanded form, resolving relative +/// references against the given base IRI and loading any remote context through +/// the given resolver. The result is always a JSON array. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// const auto document{sourcemeta::core::parse_json(R"({ +/// "@context": { "name": "https://schema.org/name" }, +/// "name": "Sourcemeta" +/// })")}; +/// const auto expanded{sourcemeta::core::jsonld_expand(document)}; +/// sourcemeta::core::prettify(expanded, std::cout); +/// std::cout << std::endl; +/// ``` +SOURCEMETA_CORE_JSONLD_EXPORT +auto jsonld_expand(const JSON &input, const JSON::StringView base_iri = "", + const JSONLDResolver &resolver = {}, + const JSONLDVersion version = JSONLDVersion::V1_1) -> JSON; + +/// @ingroup jsonld +/// +/// Expand a JSON-LD document, applying the given expansion context before the +/// document's own context, as if it had been prepended to the input. For +/// example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// const auto document{sourcemeta::core::parse_json( +/// R"({ "name": "Sourcemeta" })")}; +/// const auto context{sourcemeta::core::parse_json( +/// R"({ "name": "https://schema.org/name" })")}; +/// const auto expanded{sourcemeta::core::jsonld_expand(document, context)}; +/// sourcemeta::core::prettify(expanded, std::cout); +/// std::cout << std::endl; +/// ``` +SOURCEMETA_CORE_JSONLD_EXPORT +auto jsonld_expand(const JSON &input, const JSON &expand_context, + const JSON::StringView base_iri = "", + const JSONLDResolver &resolver = {}, + const JSONLDVersion version = JSONLDVersion::V1_1) -> JSON; + +} // namespace sourcemeta::core + +#endif diff --git a/src/core/jsonld/include/sourcemeta/core/jsonld_error.h b/src/core/jsonld/include/sourcemeta/core/jsonld_error.h new file mode 100644 index 000000000..e9b38ae5c --- /dev/null +++ b/src/core/jsonld/include/sourcemeta/core/jsonld_error.h @@ -0,0 +1,51 @@ +#ifndef SOURCEMETA_CORE_JSONLD_ERROR_H_ +#define SOURCEMETA_CORE_JSONLD_ERROR_H_ + +#ifndef SOURCEMETA_CORE_JSONLD_EXPORT +#include +#endif + +#include // std::exception +#include // std::string +#include // std::string_view + +namespace sourcemeta::core { + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251 4275) +#endif + +/// @ingroup jsonld +/// An error that represents a JSON-LD processing failure. The message is one of +/// the error codes defined by the JSON-LD 1.1 API specification, such as +/// `invalid @id value` or `cyclic IRI mapping` +class SOURCEMETA_CORE_JSONLD_EXPORT JSONLDError : public std::exception { +public: + JSONLDError(const char *code) : code_{code} {} + JSONLDError(std::string code) = delete; + JSONLDError(std::string &&code) = delete; + JSONLDError(std::string_view code) = delete; + + [[nodiscard]] auto what() const noexcept -> const char * override { + return this->code_; + } + + /// Get the JSON-LD error code + [[nodiscard]] auto code() const noexcept -> const char * { + return this->code_; + } + +private: + const char *code_; +}; + +#if defined(_MSC_VER) +#pragma warning(default : 4251 4275) +#endif + +} // namespace sourcemeta::core + +#endif diff --git a/src/core/jsonld/jsonld.cc b/src/core/jsonld/jsonld.cc new file mode 100644 index 000000000..3ecb2f1b3 --- /dev/null +++ b/src/core/jsonld/jsonld.cc @@ -0,0 +1,18 @@ +#include + +namespace sourcemeta::core { + +auto jsonld_expand(const JSON &, const JSON::StringView, const JSONLDResolver &, + const JSONLDVersion) -> JSON { + // TODO: Implement the JSON-LD 1.1 expansion algorithm + return JSON::make_array(); +} + +auto jsonld_expand(const JSON &input, const JSON &, + const JSON::StringView base_iri, + const JSONLDResolver &resolver, const JSONLDVersion version) + -> JSON { + return jsonld_expand(input, base_iri, resolver, version); +} + +} // namespace sourcemeta::core diff --git a/test/jsonld/CMakeLists.txt b/test/jsonld/CMakeLists.txt new file mode 100644 index 000000000..6be876d51 --- /dev/null +++ b/test/jsonld/CMakeLists.txt @@ -0,0 +1,20 @@ +sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME jsonld + SOURCES jsonld_expand_test.cc) + +target_link_libraries(sourcemeta_core_jsonld_unit + PRIVATE sourcemeta::core::jsonld) +target_link_libraries(sourcemeta_core_jsonld_unit + PRIVATE sourcemeta::core::json) + +# W3C JSON-LD Test Suite +# See https://github.com/w3c/json-ld-api +sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME jsonld_suite + SOURCES jsonld_suite.cc) +target_compile_definitions(sourcemeta_core_jsonld_suite_unit + PRIVATE JSONLD_SUITE_PATH="${PROJECT_SOURCE_DIR}/vendor/w3c-json-ld/tests") +target_link_libraries(sourcemeta_core_jsonld_suite_unit + PRIVATE sourcemeta::core::jsonld) +target_link_libraries(sourcemeta_core_jsonld_suite_unit + PRIVATE sourcemeta::core::json) +target_link_libraries(sourcemeta_core_jsonld_suite_unit + PRIVATE sourcemeta::core::text) diff --git a/test/jsonld/jsonld_expand_test.cc b/test/jsonld/jsonld_expand_test.cc new file mode 100644 index 000000000..7ce7b499e --- /dev/null +++ b/test/jsonld/jsonld_expand_test.cc @@ -0,0 +1,13 @@ +#include + +#include +#include + +// TODO: Remove this placeholder and replace it with real unit tests once +// jsonld_expand is implemented +TEST(JSONLD_expand, returns_empty_array_for_now) { + const auto document{ + sourcemeta::core::parse_json(R"({ "@id": "https://example.com" })")}; + const auto result{sourcemeta::core::jsonld_expand(document)}; + EXPECT_EQ(result, sourcemeta::core::JSON::make_array()); +} diff --git a/test/jsonld/jsonld_suite.cc b/test/jsonld/jsonld_suite.cc new file mode 100644 index 000000000..8047c9ea6 --- /dev/null +++ b/test/jsonld/jsonld_suite.cc @@ -0,0 +1,170 @@ +#include + +#include +#include +#include + +#include // std::filesystem +#include // std::optional +#include // std::string +#include // std::string_view +#include // std::move + +namespace { + +struct JSONLDExpandCase { + std::filesystem::path suite_root; + sourcemeta::core::JSON::String base_prefix; + std::filesystem::path input; + std::filesystem::path expect; + sourcemeta::core::JSON::String error_code; + sourcemeta::core::JSON::String base_iri; + sourcemeta::core::JSONLDVersion version; + bool negative; + std::optional expand_context; +}; + +class JSONLDExpandTest : public testing::Test { +public: + explicit JSONLDExpandTest(JSONLDExpandCase test_case) + : test_case_{std::move(test_case)} {} + + auto TestBody() -> void override { + // TODO: Remove this guard once jsonld_expand is implemented. The harness + // below already drives the full upstream expansion suite, so dropping the + // skip will run every case for real + if (this->pending_) { + GTEST_SKIP() << "jsonld_expand is not yet implemented"; + } + + const auto &test_case{this->test_case_}; + const sourcemeta::core::JSONLDResolver resolver = + [&test_case](const sourcemeta::core::JSON::StringView identifier) + -> std::optional { + if (!identifier.starts_with(test_case.base_prefix)) { + return std::nullopt; + } + const auto path{test_case.suite_root / + identifier.substr(test_case.base_prefix.size())}; + if (!std::filesystem::exists(path)) { + return std::nullopt; + } + return sourcemeta::core::read_json(path); + }; + + const auto input{sourcemeta::core::read_json(test_case.input)}; + + if (test_case.negative) { + try { + if (test_case.expand_context.has_value()) { + const auto context{ + sourcemeta::core::read_json(test_case.expand_context.value())}; + static_cast(sourcemeta::core::jsonld_expand( + input, context, test_case.base_iri, resolver, test_case.version)); + } else { + static_cast(sourcemeta::core::jsonld_expand( + input, test_case.base_iri, resolver, test_case.version)); + } + FAIL() << "Expected error code: " << test_case.error_code; + } catch (const sourcemeta::core::JSONLDError &error) { + EXPECT_EQ(test_case.error_code, error.code()); + } + } else { + const auto expected{sourcemeta::core::read_json(test_case.expect)}; + if (test_case.expand_context.has_value()) { + const auto context{ + sourcemeta::core::read_json(test_case.expand_context.value())}; + EXPECT_EQ(sourcemeta::core::jsonld_expand(input, context, + test_case.base_iri, resolver, + test_case.version), + expected); + } else { + EXPECT_EQ(sourcemeta::core::jsonld_expand(input, test_case.base_iri, + resolver, test_case.version), + expected); + } + } + } + +private: + JSONLDExpandCase test_case_; + bool pending_{true}; +}; + +auto sanitize(const std::string_view identifier) -> std::string { + std::string result; + for (const auto character : identifier) { + if (character == '#') { + continue; + } + result += sourcemeta::core::is_alphanum(character) ? character : '_'; + } + return result; +} + +auto register_case(const sourcemeta::core::JSON &entry, + const std::filesystem::path &suite_root, + const sourcemeta::core::JSON::String &base_prefix) -> void { + bool negative{false}; + for (const auto &type : entry.at("@type").as_array()) { + if (type.to_string() == "jld:NegativeEvaluationTest") { + negative = true; + } + } + + const auto &input_relative{entry.at("input").to_string()}; + + JSONLDExpandCase test_case; + test_case.suite_root = suite_root; + test_case.base_prefix = base_prefix; + test_case.input = suite_root / input_relative; + test_case.base_iri = base_prefix + input_relative; + test_case.version = sourcemeta::core::JSONLDVersion::V1_1; + test_case.negative = negative; + + if (entry.defines("option")) { + const auto &option{entry.at("option")}; + if (option.defines("base")) { + test_case.base_iri = option.at("base").to_string(); + } + if ((option.defines("specVersion") && + option.at("specVersion").to_string() == "json-ld-1.0") || + (option.defines("processingMode") && + option.at("processingMode").to_string() == "json-ld-1.0")) { + test_case.version = sourcemeta::core::JSONLDVersion::V1_0; + } + if (option.defines("expandContext")) { + test_case.expand_context = + suite_root / option.at("expandContext").to_string(); + } + } + + if (negative) { + test_case.error_code = entry.at("expectErrorCode").to_string(); + } else { + test_case.expect = suite_root / entry.at("expect").to_string(); + } + + testing::RegisterTest( + "JSONLD_expand", sanitize(entry.at("@id").to_string()).c_str(), nullptr, + nullptr, __FILE__, __LINE__, [test_case]() -> testing::Test * { + return new JSONLDExpandTest(test_case); + }); +} + +} // namespace + +auto main(int argc, char **argv) -> int { + testing::InitGoogleTest(&argc, argv); + + const std::filesystem::path suite_root{JSONLD_SUITE_PATH}; + const auto manifest{ + sourcemeta::core::read_json(suite_root / "expand-manifest.jsonld")}; + const auto &base_prefix{manifest.at("baseIri").to_string()}; + + for (const auto &entry : manifest.at("sequence").as_array()) { + register_case(entry, suite_root, base_prefix); + } + + return RUN_ALL_TESTS(); +} diff --git a/test/packaging/find_package/CMakeLists.txt b/test/packaging/find_package/CMakeLists.txt index a79a1fc21..bb01ddc8e 100644 --- a/test/packaging/find_package/CMakeLists.txt +++ b/test/packaging/find_package/CMakeLists.txt @@ -23,6 +23,7 @@ target_link_libraries(core_hello PRIVATE sourcemeta::core::uri) target_link_libraries(core_hello PRIVATE sourcemeta::core::uritemplate) target_link_libraries(core_hello PRIVATE sourcemeta::core::json) target_link_libraries(core_hello PRIVATE sourcemeta::core::jsonpointer) +target_link_libraries(core_hello PRIVATE sourcemeta::core::jsonld) target_link_libraries(core_hello PRIVATE sourcemeta::core::jsonl) target_link_libraries(core_hello PRIVATE sourcemeta::core::yaml) target_link_libraries(core_hello PRIVATE sourcemeta::core::gzip) diff --git a/test/packaging/find_package/hello.cc b/test/packaging/find_package/hello.cc index 8dd59c4be..5b8672aa5 100644 --- a/test/packaging/find_package/hello.cc +++ b/test/packaging/find_package/hello.cc @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include From 892cb34cb8346541f46f77c1c026855158ec0199 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Thu, 25 Jun 2026 19:21:38 -0400 Subject: [PATCH 02/13] WIP Signed-off-by: Juan Cruz Viotti --- src/core/jsonld/CMakeLists.txt | 9 +- .../include/sourcemeta/core/jsonld_error.h | 41 +- src/core/jsonld/jsonld.cc | 78 +- src/core/jsonld/jsonld_algorithms.h | 107 ++ src/core/jsonld/jsonld_context_processing.cc | 258 +++++ .../jsonld/jsonld_create_term_definition.cc | 531 ++++++++++ src/core/jsonld/jsonld_expansion.cc | 919 ++++++++++++++++++ src/core/jsonld/jsonld_iri_expansion.cc | 86 ++ src/core/jsonld/jsonld_keywords.h | 124 +++ src/core/jsonld/jsonld_value_expansion.cc | 73 ++ test/jsonld/CMakeLists.txt | 4 +- test/jsonld/jsonld_expand_error_test.cc | 430 ++++++++ test/jsonld/jsonld_expand_test.cc | 231 ++++- test/jsonld/jsonld_suite.cc | 18 +- 14 files changed, 2873 insertions(+), 36 deletions(-) create mode 100644 src/core/jsonld/jsonld_algorithms.h create mode 100644 src/core/jsonld/jsonld_context_processing.cc create mode 100644 src/core/jsonld/jsonld_create_term_definition.cc create mode 100644 src/core/jsonld/jsonld_expansion.cc create mode 100644 src/core/jsonld/jsonld_iri_expansion.cc create mode 100644 src/core/jsonld/jsonld_keywords.h create mode 100644 src/core/jsonld/jsonld_value_expansion.cc create mode 100644 test/jsonld/jsonld_expand_error_test.cc diff --git a/src/core/jsonld/CMakeLists.txt b/src/core/jsonld/CMakeLists.txt index 9e31d2cd1..66152d423 100644 --- a/src/core/jsonld/CMakeLists.txt +++ b/src/core/jsonld/CMakeLists.txt @@ -1,6 +1,9 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME jsonld PRIVATE_HEADERS error.h - SOURCES jsonld.cc) + SOURCES jsonld.cc + jsonld_iri_expansion.cc jsonld_create_term_definition.cc + jsonld_context_processing.cc jsonld_value_expansion.cc jsonld_expansion.cc + jsonld_algorithms.h jsonld_keywords.h) if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME jsonld) @@ -8,7 +11,11 @@ endif() target_link_libraries(sourcemeta_core_jsonld PUBLIC sourcemeta::core::json) +target_link_libraries(sourcemeta_core_jsonld + PUBLIC sourcemeta::core::jsonpointer) target_link_libraries(sourcemeta_core_jsonld PRIVATE sourcemeta::core::uri) target_link_libraries(sourcemeta_core_jsonld PRIVATE sourcemeta::core::langtag) +target_link_libraries(sourcemeta_core_jsonld + PRIVATE sourcemeta::core::text) diff --git a/src/core/jsonld/include/sourcemeta/core/jsonld_error.h b/src/core/jsonld/include/sourcemeta/core/jsonld_error.h index e9b38ae5c..a91536f97 100644 --- a/src/core/jsonld/include/sourcemeta/core/jsonld_error.h +++ b/src/core/jsonld/include/sourcemeta/core/jsonld_error.h @@ -5,9 +5,12 @@ #include #endif -#include // std::exception -#include // std::string -#include // std::string_view +#include +#include + +#include // std::exception +#include // std::initializer_list +#include // std::move namespace sourcemeta::core { @@ -20,26 +23,40 @@ namespace sourcemeta::core { /// @ingroup jsonld /// An error that represents a JSON-LD processing failure. The message is one of -/// the error codes defined by the JSON-LD 1.1 API specification, such as -/// `invalid @id value` or `cyclic IRI mapping` +/// the error codes defined by the JSON-LD 1.1 API specification, and the +/// pointer locates the offending position in the input document class SOURCEMETA_CORE_JSONLD_EXPORT JSONLDError : public std::exception { public: - JSONLDError(const char *code) : code_{code} {} - JSONLDError(std::string code) = delete; - JSONLDError(std::string &&code) = delete; - JSONLDError(std::string_view code) = delete; + JSONLDError(const char *code, Pointer pointer) + : code_{code}, pointer_{std::move(pointer)} {} + + // Locate the error at a weak pointer, materialising an owned pointer. + JSONLDError(const char *code, const WeakPointer &pointer) + : code_{code}, pointer_{to_pointer(pointer)} {} + + // Locate the error at a weak pointer extended with the given trailing + // property tokens. + JSONLDError(const char *code, const WeakPointer &pointer, + const std::initializer_list children) + : code_{code}, pointer_{to_pointer(pointer)} { + for (const auto child : children) { + this->pointer_.push_back(JSON::String{child}); + } + } [[nodiscard]] auto what() const noexcept -> const char * override { return this->code_; } - /// Get the JSON-LD error code - [[nodiscard]] auto code() const noexcept -> const char * { - return this->code_; + /// Get the JSON Pointer to the position in the input document that caused the + /// error + [[nodiscard]] auto pointer() const noexcept -> const Pointer & { + return this->pointer_; } private: const char *code_; + Pointer pointer_; }; #if defined(_MSC_VER) diff --git a/src/core/jsonld/jsonld.cc b/src/core/jsonld/jsonld.cc index 3ecb2f1b3..0be074652 100644 --- a/src/core/jsonld/jsonld.cc +++ b/src/core/jsonld/jsonld.cc @@ -1,18 +1,84 @@ +#include #include +#include + +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::nullopt +#include // std::move + +namespace sourcemeta::core { + +// Run the expansion algorithm on a top-level input document and normalise the +// result into the expanded document form (JSON-LD 1.1 API Section 5.1). +auto run_expansion(ExpansionState &state, ActiveContext &active_context, + const JSON &input) -> JSON { + auto expanded{ + expand(state, active_context, std::nullopt, input, empty_weak_pointer)}; + + // A top-level map containing only @graph is replaced by its value. + if (expanded.is_object() && expanded.object_size() == 1 && + expanded.defines(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH)) { + expanded = expanded.at(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH); + } + + if (expanded.is_object()) { + if (expanded.empty() || (expanded.object_size() == 1 && + expanded.defines(KEYWORD_ID, KEYWORD_ID_HASH))) { + return JSON::make_array(); + } + auto result{JSON::make_array()}; + result.push_back(std::move(expanded)); + return result; + } + + if (!expanded.is_array()) { + return JSON::make_array(); + } + + return expanded; +} + +} // namespace sourcemeta::core namespace sourcemeta::core { -auto jsonld_expand(const JSON &, const JSON::StringView, const JSONLDResolver &, - const JSONLDVersion) -> JSON { - // TODO: Implement the JSON-LD 1.1 expansion algorithm - return JSON::make_array(); +auto jsonld_expand(const JSON &input, const JSON::StringView base_iri, + const JSONLDResolver &resolver, const JSONLDVersion version) + -> JSON { + ExpansionState state; + state.resolver = &resolver; + state.processing_1_0 = version == JSONLDVersion::V1_0; + ActiveContext active_context; + if (!base_iri.empty()) { + active_context.base = JSON::String{base_iri}; + state.document_base = JSON::String{base_iri}; + } + return run_expansion(state, active_context, input); } -auto jsonld_expand(const JSON &input, const JSON &, +auto jsonld_expand(const JSON &input, const JSON &expand_context, const JSON::StringView base_iri, const JSONLDResolver &resolver, const JSONLDVersion version) -> JSON { - return jsonld_expand(input, base_iri, resolver, version); + ExpansionState state; + state.resolver = &resolver; + state.processing_1_0 = version == JSONLDVersion::V1_0; + ActiveContext active_context; + if (!base_iri.empty()) { + active_context.base = JSON::String{base_iri}; + state.document_base = JSON::String{base_iri}; + } + const auto &context{ + expand_context.is_object() && + expand_context.defines(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + ? expand_context.at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + : expand_context}; + // The external expansion context is not part of the input document, so its + // errors carry an empty pointer. + process_context(state, active_context, context, empty_weak_pointer); + return run_expansion(state, active_context, input); } } // namespace sourcemeta::core diff --git a/src/core/jsonld/jsonld_algorithms.h b/src/core/jsonld/jsonld_algorithms.h new file mode 100644 index 000000000..53df79a3f --- /dev/null +++ b/src/core/jsonld/jsonld_algorithms.h @@ -0,0 +1,107 @@ +#ifndef SOURCEMETA_CORE_JSONLD_ALGORITHMS_H_ +#define SOURCEMETA_CORE_JSONLD_ALGORITHMS_H_ + +#include +#include +#include + +#include // std::less +#include // std::map +#include // std::shared_ptr +#include // std::optional +#include // std::vector + +namespace sourcemeta::core { + +struct TermDefinition { + std::optional iri; + std::optional type_mapping; + std::vector container; + std::optional language; + bool has_language{false}; + std::optional direction; + bool has_direction{false}; + std::optional context; + std::optional context_base; + std::optional index; + bool reverse{false}; + bool prefix{false}; + bool is_protected{false}; +}; + +struct ActiveContext { + std::map> terms; + std::optional base; + std::optional vocabulary; + std::optional default_language; + std::optional default_direction; + std::shared_ptr previous; +}; + +// The mutable state shared by the expansion algorithms for the duration of a +// single top-level expansion. +struct ExpansionState { + // Used to load remote contexts. The chain detects recursive inclusion. + const JSONLDResolver *resolver{nullptr}; + std::vector remote_context_chain; + std::optional document_base; + // When a scoped context is processed after the fact, remote references in it + // resolve against the URL of the document that defined the term. + std::optional context_base_override; + // Protected-term state for the context currently being processed. + bool context_protected{false}; + bool protected_override{false}; + bool processing_1_0{false}; + + // Remote context references resolve against the URL of the document that + // contains them: the current remote context if any, otherwise the input + // document. This is distinct from the active context base, which @base + // mutates. + [[nodiscard]] auto context_resolution_base() const + -> std::optional { + if (!this->remote_context_chain.empty()) { + return this->remote_context_chain.back(); + } + if (this->context_base_override.has_value()) { + return this->context_base_override; + } + return this->document_base; + } +}; + +// Tracks, while a context is being processed, which terms have been fully +// defined (true) versus are still being defined (false, used to detect cycles). +using DefinedTerms = std::map; + +// IRI Expansion (JSON-LD 1.1 API Section 5.2) +auto expand_iri(ExpansionState &state, ActiveContext &active_context, + const JSON::String &value, const bool document_relative, + const bool vocabulary, const JSON *const local_context, + DefinedTerms *const defined, const WeakPointer &context_pointer) + -> std::optional; + +// Create Term Definition (JSON-LD 1.1 API Section 5.1.1) +auto create_term_definition(ExpansionState &state, + ActiveContext &active_context, + const JSON &local_context, const JSON::String &term, + DefinedTerms &defined, + const WeakPointer &context_pointer) -> void; + +// Context Processing (JSON-LD 1.1 API Section 5.1) +auto process_context(ExpansionState &state, ActiveContext &active_context, + const JSON &local_context, const WeakPointer &pointer, + const bool propagate = true) -> void; + +// Value Expansion (JSON-LD 1.1 API Section 5.3.3) +auto expand_value(ExpansionState &state, ActiveContext &active_context, + const std::optional &active_property, + const JSON &value) -> JSON; + +// Expansion (JSON-LD 1.1 API Section 5.1.2) +auto expand(ExpansionState &state, ActiveContext &active_context, + const std::optional &active_property, + const JSON &element, const WeakPointer &pointer) -> JSON; + +} // namespace sourcemeta::core + +#endif diff --git a/src/core/jsonld/jsonld_context_processing.cc b/src/core/jsonld/jsonld_context_processing.cc new file mode 100644 index 000000000..c557f5288 --- /dev/null +++ b/src/core/jsonld/jsonld_context_processing.cc @@ -0,0 +1,258 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include + +#include // std::make_shared +#include // std::optional +#include // std::move, std::pair +#include // std::vector + +namespace sourcemeta::core { + +// Context Processing (JSON-LD 1.1 API Section 5.1) +auto process_context(ExpansionState &state, ActiveContext &active_context, + const JSON &local_context, const WeakPointer &pointer, + const bool propagate) -> void { + std::vector> contexts; + if (local_context.is_array()) { + std::size_t index{0}; + for (const auto &item : local_context.as_array()) { + contexts.emplace_back(&item, pointer.concat(index)); + index += 1; + } + } else { + contexts.emplace_back(&local_context, pointer); + } + + for (const auto &[entry_pointer, location] : contexts) { + const auto &context{*entry_pointer}; + if (context.is_null()) { + if (!state.protected_override) { + for (const auto &entry : active_context.terms) { + if (entry.second.is_protected) { + throw JSONLDError("Invalid context nullification", location); + } + } + } + // Nullifying the context resets to the initial context, whose base is + // the document base. + active_context = ActiveContext{}; + active_context.base = state.document_base; + continue; + } + + if (context.is_string()) { + auto reference{context.to_string()}; + const auto resolution_base{state.context_resolution_base()}; + if (resolution_base.has_value()) { + reference = URI::from_iri(reference) + .resolve_from(URI::from_iri(resolution_base.value())) + .recompose(); + } + for (const auto &loaded : state.remote_context_chain) { + if (loaded == reference) { + throw JSONLDError("Recursive context inclusion", location); + } + } + if (state.resolver == nullptr || !*state.resolver) { + throw JSONLDError("Loading remote context failed", location); + } + const auto document{(*state.resolver)(reference)}; + if (!document.has_value()) { + throw JSONLDError("Loading remote context failed", location); + } + const auto *context_entry{ + document->is_object() + ? document->try_at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + : nullptr}; + if (context_entry == nullptr) { + throw JSONLDError("Invalid remote context", location); + } + state.remote_context_chain.push_back(reference); + try { + process_context(state, active_context, *context_entry, location, + propagate); + } catch (...) { + state.remote_context_chain.pop_back(); + throw; + } + state.remote_context_chain.pop_back(); + continue; + } + + if (!context.is_object()) { + throw JSONLDError("Invalid local context", location); + } + + if (state.processing_1_0 && + context.defines(KEYWORD_VERSION, KEYWORD_VERSION_HASH)) { + throw JSONLDError("Processing mode conflict", location, + {KEYWORD_VERSION}); + } + if (const auto *version{ + context.try_at(KEYWORD_VERSION, KEYWORD_VERSION_HASH)}; + version != nullptr && + (!version->is_real() || version->to_real() != 1.1)) { + throw JSONLDError("Invalid @version value", location, {KEYWORD_VERSION}); + } + if (state.processing_1_0 && + (context.defines(KEYWORD_PROPAGATE, KEYWORD_PROPAGATE_HASH) || + context.defines(KEYWORD_IMPORT, KEYWORD_IMPORT_HASH) || + context.defines(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH))) { + throw JSONLDError("Invalid context entry", location); + } + + bool effective_propagate{propagate}; + if (const auto *propagate_entry{ + context.try_at(KEYWORD_PROPAGATE, KEYWORD_PROPAGATE_HASH)}) { + if (!propagate_entry->is_boolean()) { + throw JSONLDError("Invalid @propagate value", location, + {KEYWORD_PROPAGATE}); + } + effective_propagate = propagate_entry->to_boolean(); + } + if (!effective_propagate && !active_context.previous) { + auto snapshot{std::make_shared(active_context)}; + snapshot->previous = nullptr; + active_context.previous = snapshot; + } + + // @protected applies to imported terms too, so it is set before @import. + const bool saved_protected{state.context_protected}; + if (const auto *protected_entry{ + context.try_at(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH)}) { + if (!protected_entry->is_boolean()) { + throw JSONLDError("Invalid @protected value", location, + {KEYWORD_PROTECTED}); + } + state.context_protected = protected_entry->to_boolean(); + } + + if (const auto *import_entry{ + context.try_at(KEYWORD_IMPORT, KEYWORD_IMPORT_HASH)}) { + const auto &import{*import_entry}; + if (!import.is_string()) { + throw JSONLDError("Invalid @import value", location, {KEYWORD_IMPORT}); + } + auto reference{import.to_string()}; + const auto resolution_base{state.context_resolution_base()}; + if (resolution_base.has_value()) { + reference = URI::from_iri(reference) + .resolve_from(URI::from_iri(resolution_base.value())) + .recompose(); + } + if (state.resolver == nullptr || !*state.resolver) { + throw JSONLDError("Loading remote context failed", location, + {KEYWORD_IMPORT}); + } + const auto document{(*state.resolver)(reference)}; + const auto *imported_context{ + document.has_value() && document->is_object() + ? document->try_at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + : nullptr}; + if (imported_context == nullptr || !imported_context->is_object()) { + throw JSONLDError("Invalid remote context", location, {KEYWORD_IMPORT}); + } + if (imported_context->defines(KEYWORD_IMPORT, KEYWORD_IMPORT_HASH)) { + throw JSONLDError("Invalid context entry", location, {KEYWORD_IMPORT}); + } + // Merge the imported entries with the current ones, the current ones + // overriding, and process the result as a single context. + auto merged{JSON{*imported_context}}; + for (const auto &entry : context.as_object()) { + if (JSON::StringView{entry.first} != KEYWORD_IMPORT) { + merged.assign(entry.first, entry.second); + } + } + process_context(state, active_context, merged, location, propagate); + state.context_protected = saved_protected; + continue; + } + + if (const auto *base_entry{ + context.try_at(KEYWORD_BASE, KEYWORD_BASE_HASH)}) { + const auto &base{*base_entry}; + if (base.is_null()) { + active_context.base = std::nullopt; + } else if (!base.is_string()) { + throw JSONLDError("Invalid base IRI", location, {KEYWORD_BASE}); + } else if (active_context.base.has_value()) { + active_context.base = + URI::from_iri(base.to_string()) + .resolve_from(URI::from_iri(active_context.base.value())) + .recompose(); + } else { + active_context.base = base.to_string(); + } + } + + if (const auto *vocabulary_entry{ + context.try_at(KEYWORD_VOCAB, KEYWORD_VOCAB_HASH)}) { + const auto &vocabulary{*vocabulary_entry}; + if (vocabulary.is_null()) { + active_context.vocabulary = std::nullopt; + } else if (!vocabulary.is_string()) { + throw JSONLDError("Invalid vocab mapping", location, {KEYWORD_VOCAB}); + } else { + const auto &vocabulary_string{vocabulary.to_string()}; + // In 1.0, @vocab must be an absolute IRI or blank node identifier. + if (state.processing_1_0 && + vocabulary_string.find(':') == JSON::String::npos) { + throw JSONLDError("Invalid vocab mapping", location, {KEYWORD_VOCAB}); + } + active_context.vocabulary = + expand_iri(state, active_context, vocabulary_string, true, true, + nullptr, nullptr, empty_weak_pointer); + } + } + + if (const auto *language_entry{ + context.try_at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH)}) { + const auto &language{*language_entry}; + if (language.is_null()) { + active_context.default_language = std::nullopt; + } else if (!language.is_string()) { + throw JSONLDError("Invalid default language", location, + {KEYWORD_LANGUAGE}); + } else { + active_context.default_language = language.to_string(); + } + } + + if (const auto *direction_entry{ + context.try_at(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH)}) { + const auto &direction{*direction_entry}; + if (direction.is_null()) { + active_context.default_direction = std::nullopt; + } else if (!direction.is_string()) { + throw JSONLDError("Invalid base direction", location, + {KEYWORD_DIRECTION}); + } else { + const auto &direction_string{direction.to_string()}; + if (direction_string != "ltr" && direction_string != "rtl") { + throw JSONLDError("Invalid base direction", location, + {KEYWORD_DIRECTION}); + } + active_context.default_direction = direction_string; + } + } + + DefinedTerms defined; + for (const auto &entry : context.as_object()) { + const auto &name{entry.first}; + if (name == KEYWORD_BASE || name == KEYWORD_VOCAB || + name == KEYWORD_LANGUAGE || name == KEYWORD_VERSION || + name == KEYWORD_DIRECTION || name == KEYWORD_IMPORT || + name == KEYWORD_PROPAGATE || name == KEYWORD_PROTECTED) { + continue; + } + create_term_definition(state, active_context, context, name, defined, + location); + } + + state.context_protected = saved_protected; + } +} + +} // namespace sourcemeta::core diff --git a/src/core/jsonld/jsonld_create_term_definition.cc b/src/core/jsonld/jsonld_create_term_definition.cc new file mode 100644 index 000000000..422ab726e --- /dev/null +++ b/src/core/jsonld/jsonld_create_term_definition.cc @@ -0,0 +1,531 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include + +#include // std::optional +#include // std::move + +namespace sourcemeta::core { + +namespace { + +// Whether the given value is a valid @container value. +auto is_valid_container(const JSON::StringView value) -> bool { + return value == KEYWORD_LIST || value == KEYWORD_SET || + value == KEYWORD_INDEX || value == KEYWORD_LANGUAGE || + value == KEYWORD_ID || value == KEYWORD_TYPE || value == KEYWORD_GRAPH; +} + +// Whether the given value ends with a URI generic delimiter (RFC 3986). +auto ends_with_gen_delim(const JSON::StringView value) -> bool { + return !value.empty() && URI::is_gen_delim(value.back()); +} + +// Whether two definitions are equivalent ignoring their protected status, which +// is what a protected-term redefinition check compares. +auto same_definition(const TermDefinition &left, const TermDefinition &right) + -> bool { + return left.iri == right.iri && left.type_mapping == right.type_mapping && + left.container == right.container && left.language == right.language && + left.has_language == right.has_language && + left.direction == right.direction && + left.has_direction == right.has_direction && + left.context == right.context && left.index == right.index && + left.reverse == right.reverse && left.prefix == right.prefix; +} + +// Store a freshly-built term definition, enforcing protected-term redefinition. +auto finalize_definition(ExpansionState &state, ActiveContext &active_context, + DefinedTerms &defined, const JSON::String &term, + const WeakPointer &term_pointer, + const std::optional &previous, + TermDefinition &&candidate) -> void { + if (previous.has_value() && previous->is_protected && + !state.protected_override) { + if (!same_definition(previous.value(), candidate)) { + throw JSONLDError("Protected term redefinition", term_pointer); + } + // A redefinition with the same definition retains the protected flag. + candidate.is_protected = true; + } + active_context.terms[term] = std::move(candidate); + defined[term] = true; +} + +} // namespace + +// Create Term Definition (JSON-LD 1.1 API Section 5.1.1) +auto create_term_definition(ExpansionState &state, + ActiveContext &active_context, + const JSON &local_context, const JSON::String &term, + DefinedTerms &defined, + const WeakPointer &context_pointer) -> void { + const auto status{defined.find(term)}; + if (status != defined.cend()) { + if (status->second) { + return; + } + throw JSONLDError("Cyclic IRI mapping", context_pointer.concat(term)); + } + + if (term.empty()) { + throw JSONLDError("Invalid term definition", context_pointer); + } + + defined[term] = false; + const auto &value{local_context.at(term)}; + const WeakPointer term_pointer{context_pointer.concat(term)}; + + if (is_keyword(term)) { + if (term == KEYWORD_TYPE && value.is_object() && !state.processing_1_0) { + TermDefinition type_definition; + bool has_container{false}; + bool invalid_entry{false}; + for (const auto &entry : value.as_object()) { + const auto &name{entry.first}; + if (name == KEYWORD_PROTECTED) { + if (!entry.second.is_boolean()) { + throw JSONLDError("Invalid @protected value", term_pointer, + {KEYWORD_PROTECTED}); + } + type_definition.is_protected = entry.second.to_boolean(); + } else if (name == KEYWORD_CONTAINER && entry.second.is_string()) { + const auto &container{entry.second.to_string()}; + if (container == KEYWORD_SET || container == KEYWORD_ID) { + type_definition.container.push_back(container); + has_container = true; + } else { + invalid_entry = true; + } + } else { + invalid_entry = true; + } + } + // A redefinition of a protected @type is rejected before the shape of + // the new definition is validated. + const auto existing_type{active_context.terms.find(KEYWORD_TYPE)}; + if (existing_type != active_context.terms.cend() && + existing_type->second.is_protected && !state.protected_override) { + if (!same_definition(existing_type->second, type_definition)) { + throw JSONLDError("Protected term redefinition", term_pointer); + } + type_definition.is_protected = true; + } else if (invalid_entry || !has_container) { + throw JSONLDError("Keyword redefinition", term_pointer); + } else if (!type_definition.is_protected) { + type_definition.is_protected = state.context_protected; + } + active_context.terms[JSON::String{KEYWORD_TYPE}] = + std::move(type_definition); + defined[term] = true; + return; + } + throw JSONLDError("Keyword redefinition", term_pointer); + } + + if (has_keyword_form(term)) { + defined[term] = true; + return; + } + + std::optional previous; + const auto existing{active_context.terms.find(term)}; + if (existing != active_context.terms.cend()) { + previous = existing->second; + } + active_context.terms.erase(term); + + const auto *id_entry{ + value.is_object() ? value.try_at(KEYWORD_ID, KEYWORD_ID_HASH) : nullptr}; + if (value.is_null() || (id_entry != nullptr && id_entry->is_null())) { + TermDefinition empty; + empty.is_protected = state.context_protected; + finalize_definition(state, active_context, defined, term, term_pointer, + previous, std::move(empty)); + return; + } + + TermDefinition definition; + definition.is_protected = state.context_protected; + bool simple_term{false}; + + if (value.is_string()) { + simple_term = true; + const auto &string_value{value.to_string()}; + if (!is_keyword(string_value) && has_keyword_form(string_value)) { + defined[term] = true; + return; + } + if (string_value == term) { + // A self-referential simple term resolves through the term itself. + const auto colon{term.find(':')}; + if (colon != JSON::String::npos) { + const auto prefix{term.substr(0, colon)}; + const auto suffix{term.substr(colon + 1)}; + if (prefix != "_" && !suffix.starts_with("//") && + local_context.is_object() && local_context.defines(prefix)) { + const auto iterator{defined.find(prefix)}; + if (iterator == defined.cend() || !iterator->second) { + create_term_definition(state, active_context, local_context, prefix, + defined, context_pointer); + } + } + const auto prefix_definition{active_context.terms.find(prefix)}; + if (prefix_definition != active_context.terms.cend() && + prefix_definition->second.iri.has_value()) { + definition.iri = prefix_definition->second.iri.value() + suffix; + } else { + definition.iri = term; + } + } else if (term.find('/') != JSON::String::npos) { + definition.iri = expand_iri(state, active_context, term, false, true, + nullptr, nullptr, empty_weak_pointer); + } else if (active_context.vocabulary.has_value()) { + definition.iri = active_context.vocabulary.value() + term; + } + } else { + definition.iri = + expand_iri(state, active_context, string_value, false, true, + &local_context, &defined, context_pointer); + // In 1.1, an IRI-like term must expand to its IRI mapping. + if (!state.processing_1_0 && definition.iri.has_value()) { + const auto colon_position{term.find(':')}; + const bool iri_like_colon{colon_position != JSON::String::npos && + colon_position != 0 && + colon_position + 1 != term.size()}; + if (iri_like_colon || term.find('/') != JSON::String::npos) { + auto probe{active_context}; + const auto expanded_term{expand_iri(state, probe, term, false, true, + nullptr, nullptr, + empty_weak_pointer)}; + if (expanded_term.has_value() && expanded_term != definition.iri) { + throw JSONLDError("Invalid IRI mapping", term_pointer); + } + } + } + } + } else if (value.is_object()) { + const bool has_id{id_entry != nullptr}; + const JSON *const id{id_entry}; + if (const auto *reverse_entry{ + value.try_at(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)}) { + if (has_id || value.defines(KEYWORD_NEST, KEYWORD_NEST_HASH)) { + throw JSONLDError("Invalid reverse property", term_pointer, + {KEYWORD_REVERSE}); + } + const auto &reverse{*reverse_entry}; + if (!reverse.is_string()) { + throw JSONLDError("Invalid IRI mapping", term_pointer, + {KEYWORD_REVERSE}); + } + definition.reverse = true; + definition.iri = + expand_iri(state, active_context, reverse.to_string(), false, true, + &local_context, &defined, context_pointer); + if (!definition.iri.has_value()) { + // A reverse value with the form of a keyword is ignored. + defined[term] = true; + return; + } + if (definition.iri.value().find(':') == JSON::String::npos) { + throw JSONLDError("Invalid IRI mapping", term_pointer, + {KEYWORD_REVERSE}); + } + } else if (has_id && !id->is_null() && + (!id->is_string() || id->to_string() != term)) { + if (!id->is_string()) { + throw JSONLDError("Invalid IRI mapping", term_pointer, {KEYWORD_ID}); + } + const auto &id_value{id->to_string()}; + if (!is_keyword(id_value) && has_keyword_form(id_value)) { + defined[term] = true; + return; + } + definition.iri = expand_iri(state, active_context, id_value, false, true, + &local_context, &defined, context_pointer); + const auto &mapping{definition.iri}; + if (!mapping.has_value() || + (!is_keyword(mapping.value()) && + mapping.value().find(':') == JSON::String::npos && + !active_context.vocabulary.has_value())) { + throw JSONLDError("Invalid IRI mapping", term_pointer, {KEYWORD_ID}); + } + if (mapping.has_value() && mapping.value() == KEYWORD_CONTEXT) { + throw JSONLDError("Invalid keyword alias", term_pointer, {KEYWORD_ID}); + } + // In 1.1, a term that itself has the form of an IRI (a colon other than + // at the edges, or a slash) must expand to its IRI mapping. + if (!state.processing_1_0 && mapping.has_value()) { + const auto colon_position{term.find(':')}; + const bool iri_like_colon{colon_position != JSON::String::npos && + colon_position != 0 && + colon_position + 1 != term.size()}; + if (iri_like_colon || term.find('/') != JSON::String::npos) { + auto probe{active_context}; + const auto expanded_term{expand_iri(state, probe, term, false, true, + nullptr, nullptr, + empty_weak_pointer)}; + if (expanded_term.has_value() && expanded_term != mapping) { + throw JSONLDError("Invalid IRI mapping", term_pointer, + {KEYWORD_ID}); + } + } + } + } else if (term.find(':') != JSON::String::npos && !term.starts_with(':') && + !term.ends_with(':')) { + const auto colon{term.find(':')}; + const auto prefix{term.substr(0, colon)}; + const auto suffix{term.substr(colon + 1)}; + if (prefix != "_" && !suffix.starts_with("//") && + local_context.is_object() && local_context.defines(prefix)) { + const auto iterator{defined.find(prefix)}; + if (iterator == defined.cend() || !iterator->second) { + create_term_definition(state, active_context, local_context, prefix, + defined, context_pointer); + } + } + const auto prefix_definition{active_context.terms.find(prefix)}; + if (prefix_definition != active_context.terms.cend() && + prefix_definition->second.iri.has_value()) { + definition.iri = prefix_definition->second.iri.value() + suffix; + } else { + definition.iri = term; + } + } else if (term.find('/') != JSON::String::npos) { + definition.iri = expand_iri(state, active_context, term, false, true, + nullptr, nullptr, empty_weak_pointer); + } else if (active_context.vocabulary.has_value()) { + definition.iri = active_context.vocabulary.value() + term; + } + + if (const auto *type_entry{value.try_at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}) { + const auto &type_value{*type_entry}; + if (!type_value.is_string()) { + throw JSONLDError("Invalid type mapping", term_pointer, {KEYWORD_TYPE}); + } + const auto type{expand_iri(state, active_context, type_value.to_string(), + false, true, &local_context, &defined, + context_pointer)}; + if (!type.has_value() || type.value().starts_with("_:") || + (type.value() != KEYWORD_ID && type.value() != KEYWORD_VOCAB && + type.value() != KEYWORD_JSON && type.value() != KEYWORD_NONE && + type.value().find(':') == JSON::String::npos) || + (state.processing_1_0 && + (type.value() == KEYWORD_JSON || type.value() == KEYWORD_NONE))) { + throw JSONLDError("Invalid type mapping", term_pointer, {KEYWORD_TYPE}); + } + definition.type_mapping = type; + } + + if (const auto *container_entry{ + value.try_at(KEYWORD_CONTAINER, KEYWORD_CONTAINER_HASH)}) { + const auto &container{*container_entry}; + if (container.is_array()) { + // Array containers are a 1.1 feature. + if (state.processing_1_0) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + for (const auto &item : container.as_array()) { + if (!item.is_string()) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + const auto &item_string{item.to_string()}; + if (!is_valid_container(item_string)) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + definition.container.push_back(item_string); + } + } else if (container.is_string()) { + const auto &container_string{container.to_string()}; + // In 1.0, only @list, @set and @index are permitted. + if (state.processing_1_0 && container_string != KEYWORD_LIST && + container_string != KEYWORD_SET && + container_string != KEYWORD_INDEX) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + if (!is_valid_container(container_string)) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + definition.container.push_back(container_string); + } else { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + if (definition.reverse) { + for (const auto &item : definition.container) { + if (item != KEYWORD_SET && item != KEYWORD_INDEX) { + throw JSONLDError("Invalid reverse property", term_pointer, + {KEYWORD_CONTAINER}); + } + } + } + bool has_list{false}; + bool has_type{false}; + for (const auto &item : definition.container) { + if (item == KEYWORD_LIST) { + has_list = true; + } else if (item == KEYWORD_TYPE) { + has_type = true; + } + } + if (has_list && definition.container.size() > 1) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + // A type-map container may only coerce its keys to identifiers. + if (has_type && definition.type_mapping.has_value() && + definition.type_mapping.value() != KEYWORD_ID && + definition.type_mapping.value() != KEYWORD_VOCAB) { + throw JSONLDError("Invalid type mapping", term_pointer, {KEYWORD_TYPE}); + } + } + + if (const auto *language_entry{ + value.try_at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH)}; + language_entry != nullptr && + !value.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + const auto &language{*language_entry}; + if (!language.is_null() && !language.is_string()) { + throw JSONLDError("Invalid language mapping", term_pointer, + {KEYWORD_LANGUAGE}); + } + definition.has_language = true; + if (language.is_string()) { + definition.language = language.to_string(); + } + } + + if (const auto *direction_entry{ + value.try_at(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH)}; + direction_entry != nullptr && + !value.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + const auto &direction{*direction_entry}; + definition.has_direction = true; + if (direction.is_string()) { + definition.direction = direction.to_string(); + } + } + + if (const auto *context_entry{ + value.try_at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH)}) { + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_CONTEXT}); + } + // Validate the scoped context eagerly so that errors surface even when + // the term is never used. Remote scoped contexts (including recursive + // ones) are validated lazily when the term is used instead. + const bool saved_override{state.protected_override}; + try { + // The error raised here is always discarded below, so its location does + // not matter. + ActiveContext probe{active_context}; + state.protected_override = true; + process_context(state, probe, *context_entry, empty_weak_pointer); + state.protected_override = saved_override; + } catch (const JSONLDError &error) { + state.protected_override = saved_override; + const JSON::StringView code{error.what()}; + if (code != "Loading remote context failed" && + code != "Recursive context inclusion" && + code != "Invalid remote context") { + throw JSONLDError("Invalid scoped context", term_pointer, + {KEYWORD_CONTEXT}); + } + } + definition.context = *context_entry; + definition.context_base = state.context_resolution_base(); + } + + if (const auto *prefix_entry{ + value.try_at(KEYWORD_PREFIX, KEYWORD_PREFIX_HASH)}) { + if (!prefix_entry->is_boolean()) { + throw JSONLDError("Invalid @prefix value", term_pointer, + {KEYWORD_PREFIX}); + } + definition.prefix = prefix_entry->to_boolean(); + if (definition.prefix && (term.find(':') != JSON::String::npos || + term.find('/') != JSON::String::npos)) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_PREFIX}); + } + if (definition.prefix && definition.iri.has_value() && + is_keyword(definition.iri.value())) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_PREFIX}); + } + } + + if (const auto *nest_entry{value.try_at(KEYWORD_NEST, KEYWORD_NEST_HASH)}) { + const auto &nest{*nest_entry}; + if (!nest.is_string()) { + throw JSONLDError("Invalid @nest value", term_pointer, {KEYWORD_NEST}); + } + const auto &nest_string{nest.to_string()}; + if (is_keyword(nest_string) && nest_string != KEYWORD_NEST) { + throw JSONLDError("Invalid @nest value", term_pointer, {KEYWORD_NEST}); + } + } + + if (const auto *index_entry{ + value.try_at(KEYWORD_INDEX, KEYWORD_INDEX_HASH)}) { + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_INDEX}); + } + bool has_index_container{false}; + for (const auto &item : definition.container) { + if (item == KEYWORD_INDEX) { + has_index_container = true; + } + } + const auto &index{*index_entry}; + if (!index.is_string() || !has_index_container) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_INDEX}); + } + const auto &index_string{index.to_string()}; + const auto index_iri{expand_iri(state, active_context, index_string, + false, true, &local_context, &defined, + context_pointer)}; + if (!index_iri.has_value() || is_keyword(index_iri.value())) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_INDEX}); + } + definition.index = index_string; + } + + if (const auto *protected_entry{ + value.try_at(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH)}) { + if (!protected_entry->is_boolean()) { + throw JSONLDError("Invalid @protected value", term_pointer, + {KEYWORD_PROTECTED}); + } + definition.is_protected = protected_entry->to_boolean(); + } + } else { + throw JSONLDError("Invalid term definition", term_pointer); + } + + if (simple_term && term.find(':') == JSON::String::npos && + term.find('/') == JSON::String::npos && definition.iri.has_value() && + (ends_with_gen_delim(definition.iri.value()) || + definition.iri.value().starts_with("_:"))) { + definition.prefix = true; + } + + if (!definition.reverse && !definition.iri.has_value()) { + throw JSONLDError("Invalid IRI mapping", term_pointer); + } + + finalize_definition(state, active_context, defined, term, term_pointer, + previous, std::move(definition)); +} + +} // namespace sourcemeta::core diff --git a/src/core/jsonld/jsonld_expansion.cc b/src/core/jsonld/jsonld_expansion.cc new file mode 100644 index 000000000..0df961c25 --- /dev/null +++ b/src/core/jsonld/jsonld_expansion.cc @@ -0,0 +1,919 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::ranges::sort +#include // std::size_t +#include // std::optional +#include // std::move, std::pair +#include // std::vector + +namespace sourcemeta::core { + +namespace { + +auto into_array(JSON &&value) -> JSON { + if (value.is_array()) { + return std::move(value); + } + auto result{JSON::make_array()}; + result.push_back(std::move(value)); + return result; +} + +// The entries of an object in sorted key order, which is the order expansion +// uses so that values merged from several keys are deterministic. The keys and +// values are referenced from the object (which must outlive the result), never +// copied. +auto sorted_entries(const JSON &object) + -> std::vector> { + std::vector> entries; + for (const auto &entry : object.as_object()) { + entries.emplace_back(&entry.first, &entry.second); + } + std::ranges::sort(entries, [](const auto &left, const auto &right) -> bool { + return *left.first < *right.first; + }); + return entries; +} + +// Append the values, which must be an array, into the array stored at the given +// key, creating it if absent. +auto merge(JSON &object, const JSON::StringView name, JSON &&values) -> void { + if (object.defines(name)) { + for (auto &item : values.as_array()) { + object.at(name).push_back(item); + } + } else { + object.assign(name, std::move(values)); + } +} + +auto container_includes(const TermDefinition *const definition, + const JSON::StringView name) -> bool { + if (definition == nullptr) { + return false; + } + for (const auto &entry : definition->container) { + if (entry == name) { + return true; + } + } + return false; +} + +// Expand a single @type value against the context that preceded type-scoped +// processing. +auto expand_type(ExpansionState &state, const ActiveContext &type_context, + const JSON &item) -> JSON { + auto context{type_context}; + const auto type{expand_iri(state, context, item.to_string(), true, true, + nullptr, nullptr, empty_weak_pointer)}; + return type.has_value() ? JSON{type.value()} : JSON{nullptr}; +} + +// Expand the direct (and deferred @nest) entries of a map into the result, +// mutating it in place. Mutually recursive with expand_object. +auto expand_entries(ExpansionState &state, ActiveContext &active_context, + const ActiveContext &type_context, JSON &result, + const std::optional &active_property, + const JSON &source, const WeakPointer &source_pointer) + -> void; + +// Expand a map element: the node-object (and value-object) branch of the +// Expansion algorithm, factored out of expand() below. +auto expand_object(ExpansionState &state, ActiveContext active_context, + const std::optional &active_property, + const JSON &element, const WeakPointer &pointer) -> JSON { + auto result{JSON::make_object()}; + + // @type values are expanded against the context before type-scoped contexts + // are applied. + const ActiveContext type_context{active_context}; + + // Type-scoped contexts (JSON-LD 1.1 API Section 5.1.2 step 11). The values + // are referenced from the input element, never copied. + std::vector type_values; + for (const auto &entry : element.as_object()) { + const auto expanded{expand_iri(state, active_context, entry.first, false, + true, nullptr, nullptr, empty_weak_pointer)}; + if (!expanded.has_value() || expanded.value() != KEYWORD_TYPE) { + continue; + } + if (entry.second.is_array()) { + for (const auto &item : entry.second.as_array()) { + if (item.is_string()) { + type_values.push_back(item.to_string()); + } + } + } else if (entry.second.is_string()) { + type_values.push_back(entry.second.to_string()); + } + } + std::ranges::sort(type_values); + for (const auto &type : type_values) { + // Each type-scoped context is resolved against the context that preceded + // type-scoped processing, so one type's context cannot hide another's. + const auto definition{type_context.terms.find(type)}; + if (definition != type_context.terms.cend() && + definition->second.context.has_value()) { + const auto &scoped{definition->second.context.value()}; + const auto saved_base{state.context_base_override}; + state.context_base_override = definition->second.context_base; + process_context(state, active_context, scoped, pointer, false); + state.context_base_override = saved_base; + } + } + + expand_entries(state, active_context, type_context, result, active_property, + element, pointer); + + // An empty reverse map carries no information. + if (const auto *reverse{result.try_at(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)}; + reverse != nullptr && reverse->empty()) { + result.erase(KEYWORD_REVERSE); + } + + // Post-processing (JSON-LD 1.1 API Section 5.1.2) + if (const auto *value_entry{ + result.try_at(KEYWORD_VALUE, KEYWORD_VALUE_HASH)}) { + const JSON *const type{result.try_at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}; + const bool has_type{type != nullptr}; + const JSON::String *const type_string{ + type != nullptr && type->is_string() ? &type->to_string() : nullptr}; + const bool is_json{type_string != nullptr && *type_string == KEYWORD_JSON}; + for (const auto &entry : result.as_object()) { + const auto &name{entry.first}; + if (name != KEYWORD_VALUE && name != KEYWORD_TYPE && + name != KEYWORD_LANGUAGE && name != KEYWORD_INDEX && + name != KEYWORD_DIRECTION) { + throw JSONLDError("Invalid value object", pointer); + } + if ((name == KEYWORD_LANGUAGE || name == KEYWORD_DIRECTION) && has_type) { + throw JSONLDError("Invalid value object", pointer); + } + } + const auto &content{*value_entry}; + if (content.is_null() && !is_json) { + return JSON{nullptr}; + } + if (const auto *language{ + result.try_at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH)}) { + if (!language->is_string()) { + throw JSONLDError("Invalid language-tagged string", pointer); + } + if (!content.is_string()) { + throw JSONLDError("Invalid language-tagged value", pointer); + } + } + if (has_type && (type_string == nullptr || type_string->starts_with("_:") || + type_string->find(' ') != JSON::String::npos)) { + throw JSONLDError("Invalid typed value", pointer); + } + if (!is_json && !content.is_string() && !content.is_number() && + !content.is_boolean()) { + throw JSONLDError("Invalid value object value", pointer); + } + } else if (const auto *type_entry{ + result.try_at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}; + type_entry != nullptr && !type_entry->is_array()) { + // Node objects always carry @type as an array. + result.assign(KEYWORD_TYPE, into_array(JSON{*type_entry})); + } + + // Drop an incomplete value object that has a language or direction but no + // value. + if (!result.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) && + (result.defines(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) || + result.defines(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH))) { + bool only_value_keys{true}; + for (const auto &entry : result.as_object()) { + const auto &name{entry.first}; + if (name != KEYWORD_LANGUAGE && name != KEYWORD_DIRECTION && + name != KEYWORD_INDEX) { + only_value_keys = false; + } + } + if (only_value_keys) { + return JSON{nullptr}; + } + } + + // Drop free-floating values when not under a property. + if (!active_property.has_value() || + active_property.value() == KEYWORD_GRAPH) { + if (result.empty() || result.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || + result.defines(KEYWORD_LIST, KEYWORD_LIST_HASH) || + (result.object_size() == 1 && + result.defines(KEYWORD_ID, KEYWORD_ID_HASH))) { + return JSON{nullptr}; + } + } + + if (result.defines(KEYWORD_LIST, KEYWORD_LIST_HASH) || + result.defines(KEYWORD_SET, KEYWORD_SET_HASH)) { + for (const auto &entry : result.as_object()) { + const auto &name{entry.first}; + if (name != KEYWORD_LIST && name != KEYWORD_SET && + name != KEYWORD_INDEX) { + throw JSONLDError("Invalid set or list object", pointer); + } + } + } + + // A bare @set collapses to its array. + if (const auto *set{result.try_at(KEYWORD_SET, KEYWORD_SET_HASH)}) { + return *set; + } + + return result; +} + +auto expand_entries(ExpansionState &state, ActiveContext &active_context, + const ActiveContext &type_context, JSON &result, + const std::optional &active_property, + const JSON &source, const WeakPointer &source_pointer) + -> void { + // @nest entries are deferred and processed after the direct ones. The + // property is referenced from the source object, never copied. + std::vector> nests; + for (const auto &[key_pointer, value_pointer] : sorted_entries(source)) { + const std::pair entry{*key_pointer, + *value_pointer}; + const JSON::String &property{entry.first}; + const WeakPointer entry_pointer{source_pointer.concat(property)}; + if (property == KEYWORD_CONTEXT) { + continue; + } + + const auto expanded_property{expand_iri(state, active_context, property, + false, true, nullptr, nullptr, + empty_weak_pointer)}; + + if (expanded_property.has_value() && + expanded_property.value() == KEYWORD_NEST) { + if (entry.second.is_array()) { + for (const auto &nest_value : entry.second.as_array()) { + if (!nest_value.is_object() || + nest_value.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + throw JSONLDError("Invalid @nest value", entry_pointer); + } + nests.emplace_back(&property, &nest_value); + } + } else if (entry.second.is_object() && + !entry.second.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + nests.emplace_back(&property, &entry.second); + } else { + throw JSONLDError("Invalid @nest value", entry_pointer); + } + continue; + } + if (!expanded_property.has_value()) { + continue; + } + + const auto &name{expanded_property.value()}; + if (name.find(':') == JSON::String::npos && !is_keyword(name)) { + continue; + } + + if (is_keyword(name) && name != KEYWORD_TYPE && name != KEYWORD_INCLUDED && + result.defines(name)) { + throw JSONLDError("Colliding keywords", entry_pointer); + } + + if (name == KEYWORD_ID) { + if (!entry.second.is_string()) { + throw JSONLDError("Invalid @id value", entry_pointer); + } + const auto identifier{expand_iri(state, active_context, + entry.second.to_string(), true, false, + nullptr, nullptr, empty_weak_pointer)}; + if (identifier.has_value()) { + result.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{identifier.value()}, KEYWORD_ID_HASH); + } else { + result.assign_assume_new(JSON::String{KEYWORD_ID}, JSON{nullptr}, + KEYWORD_ID_HASH); + } + continue; + } + + if (name == KEYWORD_TYPE) { + if (entry.second.is_array()) { + for (const auto &item : entry.second.as_array()) { + if (!item.is_string()) { + throw JSONLDError("Invalid type value", entry_pointer); + } + } + } else if (!entry.second.is_string()) { + throw JSONLDError("Invalid type value", entry_pointer); + } + // Expand each value, preserving whether the input was a string or an + // array. The node-object post-processing later turns a lone string into + // an array, but a value object keeps its @type as a string. + JSON expanded_type{nullptr}; + if (entry.second.is_array()) { + expanded_type = JSON::make_array(); + for (const auto &item : entry.second.as_array()) { + expanded_type.push_back(expand_type(state, type_context, item)); + } + } else { + expanded_type = expand_type(state, type_context, entry.second); + } + if (result.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + auto merged{ + into_array(std::move(result.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)))}; + for (auto &item : into_array(std::move(expanded_type)).as_array()) { + merged.push_back(item); + } + result.assign(KEYWORD_TYPE, std::move(merged)); + } else { + result.assign_assume_new(JSON::String{KEYWORD_TYPE}, + std::move(expanded_type), KEYWORD_TYPE_HASH); + } + continue; + } + + if (name == KEYWORD_VALUE) { + result.assign_assume_new(JSON::String{KEYWORD_VALUE}, JSON{entry.second}, + KEYWORD_VALUE_HASH); + continue; + } + + if (name == KEYWORD_LANGUAGE) { + result.assign_assume_new(JSON::String{KEYWORD_LANGUAGE}, + JSON{entry.second}, KEYWORD_LANGUAGE_HASH); + continue; + } + + if (name == KEYWORD_DIRECTION) { + result.assign_assume_new(JSON::String{KEYWORD_DIRECTION}, + JSON{entry.second}, KEYWORD_DIRECTION_HASH); + continue; + } + + if (name == KEYWORD_LIST || name == KEYWORD_SET) { + auto elements{JSON::make_array()}; + const auto values{into_array(JSON{entry.second})}; + std::size_t value_index{0}; + for (const auto &item : values.as_array()) { + const WeakPointer item_pointer{entry.second.is_array() + ? entry_pointer.concat(value_index) + : entry_pointer}; + auto expanded_item{ + expand(state, active_context, active_property, item, item_pointer)}; + if (expanded_item.is_array()) { + for (auto &nested : expanded_item.as_array()) { + elements.push_back(nested); + } + } else if (!expanded_item.is_null()) { + elements.push_back(std::move(expanded_item)); + } + value_index += 1; + } + if (name == KEYWORD_LIST && state.processing_1_0) { + for (const auto &item : elements.as_array()) { + if (item.is_object() && + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + throw JSONLDError("List of lists", entry_pointer); + } + } + } + result.assign(name, std::move(elements)); + continue; + } + + if (name == KEYWORD_GRAPH) { + merge( + result, KEYWORD_GRAPH, + into_array(expand(state, active_context, JSON::String{KEYWORD_GRAPH}, + entry.second, entry_pointer))); + continue; + } + + if (name == KEYWORD_INCLUDED) { + auto included{into_array(expand(state, active_context, std::nullopt, + entry.second, entry_pointer))}; + for (const auto &item : included.as_array()) { + if (!item.is_object() || + item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH) || + item.defines(KEYWORD_SET, KEYWORD_SET_HASH)) { + throw JSONLDError("Invalid @included value", entry_pointer); + } + } + merge(result, KEYWORD_INCLUDED, std::move(included)); + continue; + } + + if (name == KEYWORD_INDEX) { + if (!entry.second.is_string()) { + throw JSONLDError("Invalid @index value", entry_pointer); + } + result.assign_assume_new(JSON::String{KEYWORD_INDEX}, JSON{entry.second}, + KEYWORD_INDEX_HASH); + continue; + } + + if (name == KEYWORD_REVERSE) { + if (!entry.second.is_object()) { + throw JSONLDError("Invalid @reverse value", entry_pointer); + } + auto reversed{expand(state, active_context, JSON::String{KEYWORD_REVERSE}, + entry.second, entry_pointer)}; + if (reversed.is_object()) { + const auto *existing_reverse{ + result.try_at(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)}; + auto reverse_map{existing_reverse != nullptr ? *existing_reverse + : JSON::make_object()}; + for (const auto &reverse_entry : reversed.as_object()) { + const auto &reverse_property{reverse_entry.first}; + if (reverse_entry.hash == KEYWORD_REVERSE_HASH) { + for (auto &forward : reverse_entry.second.as_object()) { + merge(result, JSON::StringView{forward.first}, + into_array(JSON{forward.second})); + } + } else if (is_keyword(reverse_property, reverse_entry.hash)) { + throw JSONLDError("Invalid reverse property map", entry_pointer); + } else { + for (const auto &item : + into_array(JSON{reverse_entry.second}).as_array()) { + if (item.is_object() && + (item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH))) { + throw JSONLDError("Invalid reverse property value", + entry_pointer); + } + } + merge(reverse_map, reverse_property, + into_array(JSON{reverse_entry.second})); + } + } + result.assign(KEYWORD_REVERSE, std::move(reverse_map)); + } + continue; + } + + if (is_keyword(name)) { + // TODO: @direction, @json, @nest + continue; + } + + const TermDefinition *definition{nullptr}; + const auto term{active_context.terms.find(property)}; + if (term != active_context.terms.cend()) { + definition = &term->second; + } + + // Property-scoped context (JSON-LD 1.1 API Section 5.1.2 step 13.3) + ActiveContext scoped_context; + const bool scoped{definition != nullptr && definition->context.has_value()}; + if (scoped) { + scoped_context = active_context; + // A property-scoped context propagates by default, so it does not + // inherit an enclosing type-scoped revert. It may, however, set its + // own revert when it specifies @propagate: false. + scoped_context.previous = nullptr; + state.protected_override = true; + const auto saved_base{state.context_base_override}; + state.context_base_override = definition->context_base; + process_context(state, scoped_context, definition->context.value(), + entry_pointer); + state.context_base_override = saved_base; + state.protected_override = false; + } + ActiveContext &value_context{scoped ? scoped_context : active_context}; + + // A term coerced to @json keeps its value verbatim. + if (definition != nullptr && definition->type_mapping.has_value() && + definition->type_mapping.value() == KEYWORD_JSON) { + auto json_value{JSON::make_object()}; + json_value.assign_assume_new(JSON::String{KEYWORD_VALUE}, + JSON{entry.second}, KEYWORD_VALUE_HASH); + json_value.assign_assume_new(JSON::String{KEYWORD_TYPE}, + JSON{KEYWORD_JSON}, KEYWORD_TYPE_HASH); + merge(result, name, into_array(std::move(json_value))); + continue; + } + + JSON expanded_value{nullptr}; + if (entry.second.is_object() && + container_includes(definition, KEYWORD_GRAPH) && + (container_includes(definition, KEYWORD_ID) || + container_includes(definition, KEYWORD_INDEX))) { + const bool by_id{container_includes(definition, KEYWORD_ID)}; + const bool property_valued{definition->index.has_value() && + definition->index.value() != KEYWORD_INDEX}; + std::optional index_property; + if (property_valued) { + index_property = + expand_iri(state, value_context, definition->index.value(), false, + true, nullptr, nullptr, empty_weak_pointer); + } + expanded_value = JSON::make_array(); + for (const auto &[graph_key, graph_value] : + sorted_entries(entry.second)) { + const JSON::String &index{*graph_key}; + const auto expanded_key{index == KEYWORD_NONE + ? std::optional{KEYWORD_NONE} + : expand_iri(state, value_context, index, + true, false, nullptr, nullptr, + empty_weak_pointer)}; + const bool none_key{expanded_key.has_value() && + expanded_key.value() == KEYWORD_NONE}; + for (auto &item : + into_array(expand(state, value_context, property, *graph_value, + entry_pointer.concat(index))) + .as_array()) { + // Wrap the item in a graph object, unless it is already one. + JSON graph{nullptr}; + if (item.is_object() && + item.defines(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH)) { + graph = std::move(item); + } else { + graph = JSON::make_object(); + graph.assign_assume_new(JSON::String{KEYWORD_GRAPH}, + into_array(std::move(item)), + KEYWORD_GRAPH_HASH); + } + if (!none_key) { + if (by_id) { + if (!graph.defines(KEYWORD_ID, KEYWORD_ID_HASH)) { + graph.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{expanded_key.value_or(index)}, + KEYWORD_ID_HASH); + } + } else if (property_valued) { + auto combined{into_array(expand_value( + state, value_context, definition->index, JSON{index}))}; + if (graph.defines(index_property.value())) { + for (auto &existing : + graph.at(index_property.value()).as_array()) { + combined.push_back(existing); + } + } + graph.assign(index_property.value(), std::move(combined)); + } else if (!graph.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + graph.assign_assume_new(JSON::String{KEYWORD_INDEX}, JSON{index}, + KEYWORD_INDEX_HASH); + } + } + expanded_value.push_back(std::move(graph)); + } + } + } else if (entry.second.is_object() && + container_includes(definition, KEYWORD_LANGUAGE)) { + expanded_value = JSON::make_array(); + for (const auto &[language_key, language_value] : + sorted_entries(entry.second)) { + const JSON::String &language{*language_key}; + const auto expanded_language{expand_iri(state, value_context, language, + false, true, nullptr, nullptr, + empty_weak_pointer)}; + const bool is_none{language == KEYWORD_NONE || + (expanded_language.has_value() && + expanded_language.value() == KEYWORD_NONE)}; + for (auto &item : into_array(JSON{*language_value}).as_array()) { + if (item.is_null()) { + continue; + } + if (!item.is_string()) { + throw JSONLDError("Invalid language map value", + entry_pointer.concat(language)); + } + auto value{JSON::make_object()}; + value.assign_assume_new(JSON::String{KEYWORD_VALUE}, JSON{item}, + KEYWORD_VALUE_HASH); + if (!is_none) { + value.assign_assume_new(JSON::String{KEYWORD_LANGUAGE}, + JSON{language}, KEYWORD_LANGUAGE_HASH); + } + const auto direction{definition->has_direction + ? definition->direction + : active_context.default_direction}; + if (direction.has_value()) { + value.assign_assume_new(JSON::String{KEYWORD_DIRECTION}, + JSON{direction.value()}, + KEYWORD_DIRECTION_HASH); + } + expanded_value.push_back(std::move(value)); + } + } + } else if (entry.second.is_object() && + container_includes(definition, KEYWORD_INDEX)) { + const bool property_valued{definition->index.has_value() && + definition->index.value() != KEYWORD_INDEX}; + std::optional index_property; + if (property_valued) { + index_property = + expand_iri(state, value_context, definition->index.value(), false, + true, nullptr, nullptr, empty_weak_pointer); + } + expanded_value = JSON::make_array(); + for (const auto &[index_key, index_value] : + sorted_entries(entry.second)) { + const JSON::String &index{*index_key}; + for (auto &item : + into_array(expand(state, value_context, property, *index_value, + entry_pointer.concat(index))) + .as_array()) { + if (index != KEYWORD_NONE) { + if (property_valued) { + if (item.is_object() && + item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + throw JSONLDError("Invalid value object", + entry_pointer.concat(index)); + } + // The index value is prepended to any existing values. + auto combined{into_array(expand_value( + state, value_context, definition->index, JSON{index}))}; + if (item.defines(index_property.value())) { + for (auto &existing : + item.at(index_property.value()).as_array()) { + combined.push_back(existing); + } + } + item.assign(index_property.value(), std::move(combined)); + } else if (!item.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + item.assign_assume_new(JSON::String{KEYWORD_INDEX}, JSON{index}, + KEYWORD_INDEX_HASH); + } + } + expanded_value.push_back(item); + } + } + } else if (entry.second.is_object() && + (container_includes(definition, KEYWORD_ID) || + container_includes(definition, KEYWORD_TYPE))) { + const bool by_id{container_includes(definition, KEYWORD_ID)}; + expanded_value = JSON::make_array(); + for (const auto &[map_key, map_value] : sorted_entries(entry.second)) { + const JSON::String &index{*map_key}; + std::optional expanded_index; + if (index != KEYWORD_NONE) { + expanded_index = + expand_iri(state, value_context, index, by_id, !by_id, nullptr, + nullptr, empty_weak_pointer); + } + // The key may be an alias of @none, which carries no identifier. + if (expanded_index.has_value() && + expanded_index.value() == KEYWORD_NONE) { + expanded_index = std::nullopt; + } + // A type map key may carry a type-scoped context for its values. + // Type-scoped contexts do not propagate, so the values are resolved + // against the context that preceded the containing type-scoped + // context, with only this key's context layered on top. + ActiveContext entry_context{value_context.previous && !by_id + ? *value_context.previous + : value_context}; + if (!by_id) { + const auto type_definition{entry_context.terms.find(index)}; + if (type_definition != entry_context.terms.cend() && + type_definition->second.context.has_value()) { + const auto saved_base{state.context_base_override}; + state.context_base_override = type_definition->second.context_base; + process_context(state, entry_context, + type_definition->second.context.value(), + entry_pointer.concat(index)); + state.context_base_override = saved_base; + entry_context.previous = nullptr; + } + } + // String values in a type map are node references. + auto entries{JSON::make_array()}; + for (auto &raw : into_array(JSON{*map_value}).as_array()) { + if (raw.is_string() && !by_id) { + auto reference{JSON::make_object()}; + const bool reference_vocab{definition->type_mapping.has_value() && + definition->type_mapping.value() == + KEYWORD_VOCAB}; + const auto &raw_string{raw.to_string()}; + const auto referenced{expand_iri(state, value_context, raw_string, + true, reference_vocab, nullptr, + nullptr, empty_weak_pointer)}; + reference.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{referenced.value_or(raw_string)}, + KEYWORD_ID_HASH); + entries.push_back(std::move(reference)); + } else { + for (auto &expanded : + into_array(expand(state, entry_context, property, raw, + entry_pointer.concat(index))) + .as_array()) { + entries.push_back(expanded); + } + } + } + for (auto &item : entries.as_array()) { + if (expanded_index.has_value()) { + if (by_id) { + if (!item.defines(KEYWORD_ID, KEYWORD_ID_HASH)) { + item.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{expanded_index.value()}, + KEYWORD_ID_HASH); + } + } else { + auto types{JSON::make_array()}; + types.push_back(JSON{expanded_index.value()}); + if (item.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + for (auto &existing : + into_array( + std::move(item.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH))) + .as_array()) { + types.push_back(existing); + } + } + item.assign(KEYWORD_TYPE, std::move(types)); + } + } + expanded_value.push_back(item); + } + } + } else if (container_includes(definition, KEYWORD_GRAPH)) { + expanded_value = JSON::make_array(); + for (auto &item : into_array(expand(state, value_context, property, + entry.second, entry_pointer)) + .as_array()) { + auto graph{JSON::make_object()}; + graph.assign_assume_new(JSON::String{KEYWORD_GRAPH}, + into_array(std::move(item)), + KEYWORD_GRAPH_HASH); + expanded_value.push_back(std::move(graph)); + } + } else { + if (scoped && value_context.previous && entry.second.is_object() && + !entry.second.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + // A non-propagating property-scoped context applies to the immediate + // node; nested nodes revert to the previous context. + expanded_value = expand_object(state, value_context, property, + entry.second, entry_pointer); + } else { + expanded_value = + expand(state, value_context, property, entry.second, entry_pointer); + } + if (container_includes(definition, KEYWORD_LIST) && + !expanded_value.is_null() && + !(expanded_value.is_object() && + expanded_value.defines(KEYWORD_LIST, KEYWORD_LIST_HASH))) { + expanded_value = into_array(std::move(expanded_value)); + if (state.processing_1_0) { + for (const auto &item : expanded_value.as_array()) { + if (item.is_object() && + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + throw JSONLDError("List of lists", entry_pointer); + } + } + } + auto wrapper{JSON::make_object()}; + wrapper.assign_assume_new(JSON::String{KEYWORD_LIST}, + std::move(expanded_value), KEYWORD_LIST_HASH); + expanded_value = std::move(wrapper); + } + } + + if (expanded_value.is_null()) { + continue; + } + + if (definition != nullptr && definition->reverse) { + for (const auto &item : into_array(JSON{expanded_value}).as_array()) { + if (item.is_object() && + (item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH))) { + throw JSONLDError("Invalid reverse property value", entry_pointer); + } + } + const auto *existing_reverse{ + result.try_at(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)}; + auto reverse_map{existing_reverse != nullptr ? *existing_reverse + : JSON::make_object()}; + merge(reverse_map, name, into_array(std::move(expanded_value))); + result.assign(KEYWORD_REVERSE, std::move(reverse_map)); + continue; + } + + merge(result, name, into_array(std::move(expanded_value))); + } + for (const auto &[nest_property, nest] : nests) { + // A @nest alias term may carry a property-scoped context for the nested + // entries. + const WeakPointer nest_pointer{source_pointer.concat(*nest_property)}; + const auto definition{active_context.terms.find(*nest_property)}; + if (definition != active_context.terms.cend() && + definition->second.context.has_value()) { + const ActiveContext saved{active_context}; + const auto saved_base{state.context_base_override}; + state.context_base_override = definition->second.context_base; + state.protected_override = true; + process_context(state, active_context, definition->second.context.value(), + nest_pointer); + state.protected_override = false; + state.context_base_override = saved_base; + active_context.previous = nullptr; + expand_entries(state, active_context, type_context, result, + active_property, *nest, nest_pointer); + active_context = saved; + } else { + expand_entries(state, active_context, type_context, result, + active_property, *nest, nest_pointer); + } + } +} + +} // namespace + +// Expansion (JSON-LD 1.1 API Section 5.1.2) +auto expand(ExpansionState &state, ActiveContext &active_context, + const std::optional &active_property, + const JSON &element, const WeakPointer &pointer) -> JSON { + if (element.is_null()) { + return JSON{nullptr}; + } + + if (!element.is_object() && !element.is_array()) { + if (!active_property.has_value() || + active_property.value() == KEYWORD_GRAPH) { + return JSON{nullptr}; + } + return expand_value(state, active_context, active_property, element); + } + + if (element.is_array()) { + const TermDefinition *definition{nullptr}; + if (active_property.has_value()) { + const auto term{active_context.terms.find(active_property.value())}; + if (term != active_context.terms.cend()) { + definition = &term->second; + } + } + + auto result{JSON::make_array()}; + std::size_t item_index{0}; + for (const auto &item : element.as_array()) { + auto expanded{expand(state, active_context, active_property, item, + pointer.concat(item_index))}; + if (expanded.is_array()) { + for (auto &nested : expanded.as_array()) { + result.push_back(nested); + } + } else if (!expanded.is_null()) { + result.push_back(std::move(expanded)); + } + item_index += 1; + } + + if (container_includes(definition, KEYWORD_LIST)) { + if (state.processing_1_0) { + for (const auto &item : result.as_array()) { + if (item.is_object() && + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + throw JSONLDError("List of lists", pointer); + } + } + } + auto wrapper{JSON::make_object()}; + wrapper.assign_assume_new(JSON::String{KEYWORD_LIST}, std::move(result), + KEYWORD_LIST_HASH); + return wrapper; + } + + return result; + } + + // Revert a non-propagating (type-scoped) context when descending into a node + // object that is neither a value object nor an @id-only reference. + ActiveContext reverted; + ActiveContext *current{&active_context}; + if (active_context.previous) { + bool value_or_id{false}; + const bool single{element.object_size() == 1}; + for (const auto &entry : element.as_object()) { + const auto expanded{expand_iri(state, active_context, entry.first, false, + true, nullptr, nullptr, + empty_weak_pointer)}; + if (expanded.has_value() && + (expanded.value() == KEYWORD_VALUE || + (single && expanded.value() == KEYWORD_ID))) { + value_or_id = true; + break; + } + } + if (!value_or_id) { + reverted = *active_context.previous; + current = &reverted; + } + } + + if (element.defines(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH)) { + ActiveContext local{*current}; + process_context(state, local, + element.at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH), + pointer.concat(keyword_context())); + return expand_object(state, local, active_property, element, pointer); + } + + return expand_object(state, *current, active_property, element, pointer); +} + +} // namespace sourcemeta::core diff --git a/src/core/jsonld/jsonld_iri_expansion.cc b/src/core/jsonld/jsonld_iri_expansion.cc new file mode 100644 index 000000000..04d99edbb --- /dev/null +++ b/src/core/jsonld/jsonld_iri_expansion.cc @@ -0,0 +1,86 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include + +#include // std::optional + +namespace sourcemeta::core { + +// IRI Expansion (JSON-LD 1.1 API Section 5.2) +auto expand_iri(ExpansionState &state, ActiveContext &active_context, + const JSON::String &value, const bool document_relative, + const bool vocabulary, const JSON *const local_context, + DefinedTerms *const defined, const WeakPointer &context_pointer) + -> std::optional { + if (is_keyword(value)) { + return value; + } + + if (has_keyword_form(value)) { + return std::nullopt; + } + + if (local_context != nullptr && local_context->is_object() && + local_context->defines(value)) { + const auto iterator{defined->find(value)}; + if (iterator == defined->cend() || !iterator->second) { + create_term_definition(state, active_context, *local_context, value, + *defined, context_pointer); + } + } + + const auto term{active_context.terms.find(value)}; + if (term != active_context.terms.cend() && term->second.iri.has_value() && + is_keyword(term->second.iri.value())) { + return term->second.iri; + } + + if (vocabulary && term != active_context.terms.cend()) { + return term->second.iri; + } + + const auto colon{value.find(':', 1)}; + if (colon != JSON::String::npos) { + const auto prefix{value.substr(0, colon)}; + const auto suffix{value.substr(colon + 1)}; + if (prefix == "_" || suffix.starts_with("//")) { + return value; + } + + if (local_context != nullptr && local_context->is_object() && + local_context->defines(prefix)) { + const auto iterator{defined->find(prefix)}; + if (iterator == defined->cend() || !iterator->second) { + create_term_definition(state, active_context, *local_context, prefix, + *defined, context_pointer); + } + } + + const auto definition{active_context.terms.find(prefix)}; + if (definition != active_context.terms.cend() && + definition->second.iri.has_value() && definition->second.prefix) { + return definition->second.iri.value() + suffix; + } + + // The value is only already an IRI when its prefix is a valid scheme. + // Otherwise it is resolved against the vocabulary or document base below. + if (URI::is_scheme(prefix)) { + return value; + } + } + + if (vocabulary && active_context.vocabulary.has_value()) { + return active_context.vocabulary.value() + value; + } + + if (document_relative && active_context.base.has_value()) { + return URI::from_iri(value) + .resolve_from(URI::from_iri(active_context.base.value())) + .recompose(); + } + + return value; +} + +} // namespace sourcemeta::core diff --git a/src/core/jsonld/jsonld_keywords.h b/src/core/jsonld/jsonld_keywords.h new file mode 100644 index 000000000..ab16baea5 --- /dev/null +++ b/src/core/jsonld/jsonld_keywords.h @@ -0,0 +1,124 @@ +#ifndef SOURCEMETA_CORE_JSONLD_KEYWORDS_H_ +#define SOURCEMETA_CORE_JSONLD_KEYWORDS_H_ + +#include +#include + +#include // std::size_t + +namespace sourcemeta::core { + +// The JSON-LD 1.1 keywords (https://www.w3.org/TR/json-ld11/#keywords). +inline constexpr JSON::StringView KEYWORD_BASE{"@base"}; +inline constexpr JSON::StringView KEYWORD_CONTAINER{"@container"}; +inline constexpr JSON::StringView KEYWORD_CONTEXT{"@context"}; +inline constexpr JSON::StringView KEYWORD_DIRECTION{"@direction"}; +inline constexpr JSON::StringView KEYWORD_GRAPH{"@graph"}; +inline constexpr JSON::StringView KEYWORD_ID{"@id"}; +inline constexpr JSON::StringView KEYWORD_IMPORT{"@import"}; +inline constexpr JSON::StringView KEYWORD_INCLUDED{"@included"}; +inline constexpr JSON::StringView KEYWORD_INDEX{"@index"}; +inline constexpr JSON::StringView KEYWORD_JSON{"@json"}; +inline constexpr JSON::StringView KEYWORD_LANGUAGE{"@language"}; +inline constexpr JSON::StringView KEYWORD_LIST{"@list"}; +inline constexpr JSON::StringView KEYWORD_NEST{"@nest"}; +inline constexpr JSON::StringView KEYWORD_NONE{"@none"}; +inline constexpr JSON::StringView KEYWORD_PREFIX{"@prefix"}; +inline constexpr JSON::StringView KEYWORD_PROPAGATE{"@propagate"}; +inline constexpr JSON::StringView KEYWORD_PROTECTED{"@protected"}; +inline constexpr JSON::StringView KEYWORD_REVERSE{"@reverse"}; +inline constexpr JSON::StringView KEYWORD_SET{"@set"}; +inline constexpr JSON::StringView KEYWORD_TYPE{"@type"}; +inline constexpr JSON::StringView KEYWORD_VALUE{"@value"}; +inline constexpr JSON::StringView KEYWORD_VERSION{"@version"}; +inline constexpr JSON::StringView KEYWORD_VOCAB{"@vocab"}; + +// Precomputed object-key hashes for each keyword, so that object access never +// has to recompute them. +inline const auto KEYWORD_BASE_HASH{JSON::Object::hash(KEYWORD_BASE)}; +inline const auto KEYWORD_CONTAINER_HASH{JSON::Object::hash(KEYWORD_CONTAINER)}; +inline const auto KEYWORD_CONTEXT_HASH{JSON::Object::hash(KEYWORD_CONTEXT)}; +inline const auto KEYWORD_DIRECTION_HASH{JSON::Object::hash(KEYWORD_DIRECTION)}; +inline const auto KEYWORD_GRAPH_HASH{JSON::Object::hash(KEYWORD_GRAPH)}; +inline const auto KEYWORD_ID_HASH{JSON::Object::hash(KEYWORD_ID)}; +inline const auto KEYWORD_IMPORT_HASH{JSON::Object::hash(KEYWORD_IMPORT)}; +inline const auto KEYWORD_INCLUDED_HASH{JSON::Object::hash(KEYWORD_INCLUDED)}; +inline const auto KEYWORD_INDEX_HASH{JSON::Object::hash(KEYWORD_INDEX)}; +inline const auto KEYWORD_JSON_HASH{JSON::Object::hash(KEYWORD_JSON)}; +inline const auto KEYWORD_LANGUAGE_HASH{JSON::Object::hash(KEYWORD_LANGUAGE)}; +inline const auto KEYWORD_LIST_HASH{JSON::Object::hash(KEYWORD_LIST)}; +inline const auto KEYWORD_NEST_HASH{JSON::Object::hash(KEYWORD_NEST)}; +inline const auto KEYWORD_NONE_HASH{JSON::Object::hash(KEYWORD_NONE)}; +inline const auto KEYWORD_PREFIX_HASH{JSON::Object::hash(KEYWORD_PREFIX)}; +inline const auto KEYWORD_PROPAGATE_HASH{JSON::Object::hash(KEYWORD_PROPAGATE)}; +inline const auto KEYWORD_PROTECTED_HASH{JSON::Object::hash(KEYWORD_PROTECTED)}; +inline const auto KEYWORD_REVERSE_HASH{JSON::Object::hash(KEYWORD_REVERSE)}; +inline const auto KEYWORD_SET_HASH{JSON::Object::hash(KEYWORD_SET)}; +inline const auto KEYWORD_TYPE_HASH{JSON::Object::hash(KEYWORD_TYPE)}; +inline const auto KEYWORD_VALUE_HASH{JSON::Object::hash(KEYWORD_VALUE)}; +inline const auto KEYWORD_VERSION_HASH{JSON::Object::hash(KEYWORD_VERSION)}; +inline const auto KEYWORD_VOCAB_HASH{JSON::Object::hash(KEYWORD_VOCAB)}; + +// A stable owned copy of the @context keyword, suitable as a JSON Pointer +// token. (A namespace-scope JSON::String constant would trip clang-tidy's +// throwing-static-initialization check, hence the function-local static.) +inline auto keyword_context() -> const JSON::String & { + static const JSON::String value{KEYWORD_CONTEXT}; + return value; +} + +inline auto is_keyword(const JSON::StringView value) -> bool { + if (value.size() < 2 || value.front() != '@') { + return false; + } + return value == KEYWORD_BASE || value == KEYWORD_CONTAINER || + value == KEYWORD_CONTEXT || value == KEYWORD_DIRECTION || + value == KEYWORD_GRAPH || value == KEYWORD_ID || + value == KEYWORD_IMPORT || value == KEYWORD_INCLUDED || + value == KEYWORD_INDEX || value == KEYWORD_JSON || + value == KEYWORD_LANGUAGE || value == KEYWORD_LIST || + value == KEYWORD_NEST || value == KEYWORD_NONE || + value == KEYWORD_PREFIX || value == KEYWORD_PROPAGATE || + value == KEYWORD_PROTECTED || value == KEYWORD_REVERSE || + value == KEYWORD_SET || value == KEYWORD_TYPE || + value == KEYWORD_VALUE || value == KEYWORD_VERSION || + value == KEYWORD_VOCAB; +} + +inline auto is_keyword(const JSON::StringView value, + const JSON::Object::hash_type hash) -> bool { + if (value.size() < 2 || value.front() != '@') { + return false; + } + return hash == KEYWORD_BASE_HASH || hash == KEYWORD_CONTAINER_HASH || + hash == KEYWORD_CONTEXT_HASH || hash == KEYWORD_DIRECTION_HASH || + hash == KEYWORD_GRAPH_HASH || hash == KEYWORD_ID_HASH || + hash == KEYWORD_IMPORT_HASH || hash == KEYWORD_INCLUDED_HASH || + hash == KEYWORD_INDEX_HASH || hash == KEYWORD_JSON_HASH || + hash == KEYWORD_LANGUAGE_HASH || hash == KEYWORD_LIST_HASH || + hash == KEYWORD_NEST_HASH || hash == KEYWORD_NONE_HASH || + hash == KEYWORD_PREFIX_HASH || hash == KEYWORD_PROPAGATE_HASH || + hash == KEYWORD_PROTECTED_HASH || hash == KEYWORD_REVERSE_HASH || + hash == KEYWORD_SET_HASH || hash == KEYWORD_TYPE_HASH || + hash == KEYWORD_VALUE_HASH || hash == KEYWORD_VERSION_HASH || + hash == KEYWORD_VOCAB_HASH; +} + +// Whether the given value has the generic form of a keyword (an `@` followed by +// one or more letters), which the algorithms treat as a reserved token even +// when it is not a defined keyword. +inline auto has_keyword_form(const JSON::StringView value) -> bool { + if (value.size() < 2 || value.front() != '@') { + return false; + } + for (std::size_t index{1}; index < value.size(); index += 1) { + if (!is_alpha(value[index])) { + return false; + } + } + return true; +} + +} // namespace sourcemeta::core + +#endif diff --git a/src/core/jsonld/jsonld_value_expansion.cc b/src/core/jsonld/jsonld_value_expansion.cc new file mode 100644 index 000000000..6c3b9b29e --- /dev/null +++ b/src/core/jsonld/jsonld_value_expansion.cc @@ -0,0 +1,73 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::optional + +namespace sourcemeta::core { + +// Value Expansion (JSON-LD 1.1 API Section 5.3.3) +auto expand_value(ExpansionState &state, ActiveContext &active_context, + const std::optional &active_property, + const JSON &value) -> JSON { + const TermDefinition *definition{nullptr}; + if (active_property.has_value()) { + const auto iterator{active_context.terms.find(active_property.value())}; + if (iterator != active_context.terms.cend()) { + definition = &iterator->second; + } + } + + if (definition != nullptr && definition->type_mapping.has_value() && + value.is_string()) { + if (definition->type_mapping.value() == KEYWORD_ID) { + auto result{JSON::make_object()}; + const auto identifier{expand_iri(state, active_context, value.to_string(), + true, false, nullptr, nullptr, + empty_weak_pointer)}; + result.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{identifier.value_or("")}, KEYWORD_ID_HASH); + return result; + } + if (definition->type_mapping.value() == KEYWORD_VOCAB) { + auto result{JSON::make_object()}; + const auto identifier{expand_iri(state, active_context, value.to_string(), + true, true, nullptr, nullptr, + empty_weak_pointer)}; + result.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{identifier.value_or("")}, KEYWORD_ID_HASH); + return result; + } + } + + auto result{JSON::make_object()}; + result.assign_assume_new(JSON::String{KEYWORD_VALUE}, JSON{value}, + KEYWORD_VALUE_HASH); + + if (definition != nullptr && definition->type_mapping.has_value() && + definition->type_mapping.value() != KEYWORD_ID && + definition->type_mapping.value() != KEYWORD_VOCAB && + definition->type_mapping.value() != KEYWORD_NONE) { + result.assign_assume_new(JSON::String{KEYWORD_TYPE}, + JSON{definition->type_mapping.value()}, + KEYWORD_TYPE_HASH); + } else if (value.is_string()) { + const auto language{definition != nullptr && definition->has_language + ? definition->language + : active_context.default_language}; + if (language.has_value()) { + result.assign_assume_new(JSON::String{KEYWORD_LANGUAGE}, + JSON{language.value()}, KEYWORD_LANGUAGE_HASH); + } + const auto direction{definition != nullptr && definition->has_direction + ? definition->direction + : active_context.default_direction}; + if (direction.has_value()) { + result.assign_assume_new(JSON::String{KEYWORD_DIRECTION}, + JSON{direction.value()}, KEYWORD_DIRECTION_HASH); + } + } + + return result; +} + +} // namespace sourcemeta::core diff --git a/test/jsonld/CMakeLists.txt b/test/jsonld/CMakeLists.txt index 6be876d51..afb7c49d3 100644 --- a/test/jsonld/CMakeLists.txt +++ b/test/jsonld/CMakeLists.txt @@ -1,10 +1,12 @@ sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME jsonld - SOURCES jsonld_expand_test.cc) + SOURCES jsonld_expand_test.cc jsonld_expand_error_test.cc) target_link_libraries(sourcemeta_core_jsonld_unit PRIVATE sourcemeta::core::jsonld) target_link_libraries(sourcemeta_core_jsonld_unit PRIVATE sourcemeta::core::json) +target_link_libraries(sourcemeta_core_jsonld_unit + PRIVATE sourcemeta::core::jsonpointer) # W3C JSON-LD Test Suite # See https://github.com/w3c/json-ld-api diff --git a/test/jsonld/jsonld_expand_error_test.cc b/test/jsonld/jsonld_expand_error_test.cc new file mode 100644 index 000000000..b30067049 --- /dev/null +++ b/test/jsonld/jsonld_expand_error_test.cc @@ -0,0 +1,430 @@ +#include + +#include +#include +#include + +#include // std::optional, std::nullopt + +#define EXPECT_JSONLD_EXPAND_ERROR(expression, expected_code, \ + expected_pointer) \ + try { \ + [[maybe_unused]] const auto result{expression}; \ + FAIL() << "Expected JSON-LD error: " << (expected_code); \ + } catch (const sourcemeta::core::JSONLDError &error) { \ + EXPECT_STREQ(error.what(), (expected_code)); \ + EXPECT_EQ(sourcemeta::core::to_string(error.pointer()), \ + (expected_pointer)); \ + } catch (...) { \ + FAIL() << "Expected a JSONLDError: " << (expected_code); \ + } + +namespace { + +auto remote_resolver() -> sourcemeta::core::JSONLDResolver { + return [](const sourcemeta::core::JSON::StringView identifier) + -> std::optional { + if (identifier == "https://example.com/recursive") { + return sourcemeta::core::parse_json( + R"({ "@context": "https://example.com/recursive" })"); + } + if (identifier == "https://example.com/no-context") { + return sourcemeta::core::parse_json(R"({ "foo": "bar" })"); + } + return std::nullopt; + }; +} + +} // namespace + +TEST(JSONLD_expand_error, cyclic_iri_mapping) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "term": { "@id": "term:term" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Cyclic IRI mapping", "/@context/term"); +} + +TEST(JSONLD_expand_error, invalid_term_definition_empty) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "": "http://example.com/" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid term definition", "/@context"); +} + +TEST(JSONLD_expand_error, keyword_redefinition) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "@type": "http://example.com/" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Keyword redefinition", "/@context/@type"); +} + +TEST(JSONLD_expand_error, protected_term_redefinition) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": [ + { "@protected": true, "a": "http://example.com/a" }, + { "a": "http://example.com/b" } + ] + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Protected term redefinition", "/@context/1/a"); +} + +TEST(JSONLD_expand_error, invalid_protected_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@protected": "yes" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @protected value", + "/@context/a/@protected"); +} + +TEST(JSONLD_expand_error, invalid_iri_mapping) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": true } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid IRI mapping", "/@context/a/@id"); +} + +TEST(JSONLD_expand_error, invalid_keyword_alias) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "@context" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid keyword alias", "/@context/a/@id"); +} + +TEST(JSONLD_expand_error, invalid_reverse_property) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "a": { "@reverse": "http://example.com/a", "@id": "http://example.com/b" } + } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid reverse property", + "/@context/a/@reverse"); +} + +TEST(JSONLD_expand_error, invalid_type_mapping) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@type": true } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid type mapping", "/@context/a/@type"); +} + +TEST(JSONLD_expand_error, invalid_container_mapping) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@container": "@unknown" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid container mapping", + "/@context/a/@container"); +} + +TEST(JSONLD_expand_error, invalid_language_mapping) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@language": true } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid language mapping", + "/@context/a/@language"); +} + +TEST(JSONLD_expand_error, invalid_prefix_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@prefix": "yes" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @prefix value", "/@context/a/@prefix"); +} + +TEST(JSONLD_expand_error, invalid_nest_value_term) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@nest": "@id" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @nest value", "/@context/a/@nest"); +} + +TEST(JSONLD_expand_error, invalid_scoped_context) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "a": { + "@id": "http://example.com/a", + "@context": { "b": { "@id": true } } + } + } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid scoped context", "/@context/a/@context"); +} + +TEST(JSONLD_expand_error, invalid_local_context) { + const auto input = sourcemeta::core::parse_json(R"({ "@context": true })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid local context", "/@context"); +} + +TEST(JSONLD_expand_error, invalid_version_value) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@version": 2.0 } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @version value", "/@context/@version"); +} + +TEST(JSONLD_expand_error, invalid_propagate_value) { + const auto input = sourcemeta::core::parse_json( + R"({ "@context": { "@propagate": "yes" } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @propagate value", + "/@context/@propagate"); +} + +TEST(JSONLD_expand_error, invalid_import_value) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@import": true } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @import value", "/@context/@import"); +} + +TEST(JSONLD_expand_error, invalid_base_iri) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@base": true } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid base IRI", "/@context/@base"); +} + +TEST(JSONLD_expand_error, invalid_vocab_mapping) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@vocab": true } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid vocab mapping", "/@context/@vocab"); +} + +TEST(JSONLD_expand_error, invalid_default_language) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@language": true } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid default language", "/@context/@language"); +} + +TEST(JSONLD_expand_error, invalid_base_direction) { + const auto input = sourcemeta::core::parse_json( + R"({ "@context": { "@direction": "sideways" } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid base direction", "/@context/@direction"); +} + +TEST(JSONLD_expand_error, processing_mode_conflict) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@version": 1.1 } })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "Processing mode conflict", "/@context/@version"); +} + +TEST(JSONLD_expand_error, invalid_context_entry) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@protected": true } })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "Invalid context entry", "/@context"); +} + +TEST(JSONLD_expand_error, loading_remote_context_failed) { + const auto input = sourcemeta::core::parse_json( + R"({ "@context": "https://example.com/missing" })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", remote_resolver()), + "Loading remote context failed", "/@context"); +} + +TEST(JSONLD_expand_error, invalid_remote_context) { + const auto input = sourcemeta::core::parse_json( + R"({ "@context": "https://example.com/no-context" })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", remote_resolver()), + "Invalid remote context", "/@context"); +} + +TEST(JSONLD_expand_error, recursive_context_inclusion) { + const auto input = sourcemeta::core::parse_json( + R"({ "@context": "https://example.com/recursive" })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", remote_resolver()), + "Recursive context inclusion", "/@context"); +} + +TEST(JSONLD_expand_error, colliding_keywords) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "id": "@id" }, + "@id": "http://example.com/a", + "id": "http://example.com/b" + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Colliding keywords", "/id"); +} + +TEST(JSONLD_expand_error, invalid_id_value) { + const auto input = sourcemeta::core::parse_json(R"({ "@id": true })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @id value", "/@id"); +} + +TEST(JSONLD_expand_error, invalid_type_value) { + const auto input = sourcemeta::core::parse_json(R"({ "@type": true })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid type value", "/@type"); +} + +TEST(JSONLD_expand_error, invalid_value_object) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { + "@value": "x", "@type": "http://example.com/t", "@language": "en" + } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid value object", + "/http:~1~1example.com~1p"); +} + +TEST(JSONLD_expand_error, invalid_language_tagged_string) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@value": "x", "@language": true } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid language-tagged string", + "/http:~1~1example.com~1p"); +} + +TEST(JSONLD_expand_error, invalid_language_tagged_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@value": 1, "@language": "en" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid language-tagged value", + "/http:~1~1example.com~1p"); +} + +TEST(JSONLD_expand_error, invalid_typed_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@value": "x", "@type": "_:b" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid typed value", "/http:~1~1example.com~1p"); +} + +TEST(JSONLD_expand_error, invalid_value_object_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@value": { "a": 1 } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid value object value", + "/http:~1~1example.com~1p"); +} + +TEST(JSONLD_expand_error, invalid_set_or_list_object) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@list": [ "a" ], "@id": "http://example.com/x" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid set or list object", + "/http:~1~1example.com~1p"); +} + +TEST(JSONLD_expand_error, invalid_index_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@index": true, "@value": "x" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @index value", + "/http:~1~1example.com~1p/@index"); +} + +TEST(JSONLD_expand_error, invalid_reverse_value) { + const auto input = sourcemeta::core::parse_json(R"({ "@reverse": "foo" })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @reverse value", "/@reverse"); +} + +TEST(JSONLD_expand_error, invalid_reverse_property_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "@reverse": { "http://example.com/p": { "@value": "x" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid reverse property value", "/@reverse"); +} + +TEST(JSONLD_expand_error, invalid_included_value) { + const auto input = + sourcemeta::core::parse_json(R"({ "@included": { "@value": "x" } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @included value", "/@included"); +} + +TEST(JSONLD_expand_error, invalid_nest_value_expansion) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "nest": "@nest" }, + "nest": { "@value": "x" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @nest value", "/nest"); +} + +TEST(JSONLD_expand_error, list_of_lists) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@list": [ { "@list": [ "a" ] } ] } + })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "List of lists", "/http:~1~1example.com~1p/@list"); +} diff --git a/test/jsonld/jsonld_expand_test.cc b/test/jsonld/jsonld_expand_test.cc index 7ce7b499e..e56b32ed9 100644 --- a/test/jsonld/jsonld_expand_test.cc +++ b/test/jsonld/jsonld_expand_test.cc @@ -3,11 +3,228 @@ #include #include -// TODO: Remove this placeholder and replace it with real unit tests once -// jsonld_expand is implemented -TEST(JSONLD_expand, returns_empty_array_for_now) { - const auto document{ - sourcemeta::core::parse_json(R"({ "@id": "https://example.com" })")}; - const auto result{sourcemeta::core::jsonld_expand(document)}; - EXPECT_EQ(result, sourcemeta::core::JSON::make_array()); +TEST(JSONLD_expand, empty_object) { + const auto input = sourcemeta::core::parse_json("{}"); + + const auto expected = sourcemeta::core::parse_json("[]"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, absolute_iri_property_with_string_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/foo": "bar" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/foo": [ { "@value": "bar" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, node_with_id_and_property) { + const auto input = sourcemeta::core::parse_json(R"({ + "@id": "http://example.com/a", + "http://example.com/foo": "bar" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "@id": "http://example.com/a", + "http://example.com/foo": [ { "@value": "bar" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, type_is_made_an_array) { + const auto input = sourcemeta::core::parse_json(R"({ + "@id": "http://example.com/a", + "@type": "http://example.com/T", + "http://example.com/foo": "bar" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "@id": "http://example.com/a", + "@type": [ "http://example.com/T" ], + "http://example.com/foo": [ { "@value": "bar" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, multiple_values_preserve_order) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/foo": [ "a", "b" ] + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/foo": [ { "@value": "a" }, { "@value": "b" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, numeric_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/foo": 1 + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/foo": [ { "@value": 1 } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, boolean_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/foo": true + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/foo": [ { "@value": true } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, undefined_term_without_context_is_dropped) { + const auto input = sourcemeta::core::parse_json(R"({ + "foo": "bar" + })"); + + const auto expected = sourcemeta::core::parse_json("[]"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, term_maps_to_iri) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "name": "http://example.com/name" }, + "name": "John" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/name": [ { "@value": "John" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, vocabulary_mapping) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "@vocab": "http://example.com/" }, + "name": "John" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/name": [ { "@value": "John" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, compact_iri_via_prefix) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "ex": "http://example.com/" }, + "ex:name": "John" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/name": [ { "@value": "John" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, type_coercion_to_id) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "knows": { "@id": "http://example.com/knows", "@type": "@id" } + }, + "knows": "http://example.com/jane" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/knows": [ { "@id": "http://example.com/jane" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, type_coercion_to_datatype) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "born": { + "@id": "http://example.com/born", + "@type": "http://www.w3.org/2001/XMLSchema#date" + } + }, + "born": "1990-01-01" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/born": [ + { + "@value": "1990-01-01", + "@type": "http://www.w3.org/2001/XMLSchema#date" + } + ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, default_language) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "@language": "en", "name": "http://example.com/name" }, + "name": "John" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/name": [ { "@value": "John", "@language": "en" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, list_keyword) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/foo": { "@list": [ "a", "b" ] } + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/foo": [ + { "@list": [ { "@value": "a" }, { "@value": "b" } ] } + ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); } diff --git a/test/jsonld/jsonld_suite.cc b/test/jsonld/jsonld_suite.cc index 8047c9ea6..055a3ec12 100644 --- a/test/jsonld/jsonld_suite.cc +++ b/test/jsonld/jsonld_suite.cc @@ -30,13 +30,6 @@ class JSONLDExpandTest : public testing::Test { : test_case_{std::move(test_case)} {} auto TestBody() -> void override { - // TODO: Remove this guard once jsonld_expand is implemented. The harness - // below already drives the full upstream expansion suite, so dropping the - // skip will run every case for real - if (this->pending_) { - GTEST_SKIP() << "jsonld_expand is not yet implemented"; - } - const auto &test_case{this->test_case_}; const sourcemeta::core::JSONLDResolver resolver = [&test_case](const sourcemeta::core::JSON::StringView identifier) @@ -67,7 +60,15 @@ class JSONLDExpandTest : public testing::Test { } FAIL() << "Expected error code: " << test_case.error_code; } catch (const sourcemeta::core::JSONLDError &error) { - EXPECT_EQ(test_case.error_code, error.code()); + // The implementation capitalises the first letter of every error code, + // whereas the upstream suite expresses them in lower case. + std::string actual_code{error.what()}; + if (!actual_code.empty()) { + actual_code.front() = + sourcemeta::core::to_lowercase(actual_code.front()); + } + EXPECT_EQ(static_cast(test_case.error_code), + actual_code); } } else { const auto expected{sourcemeta::core::read_json(test_case.expect)}; @@ -88,7 +89,6 @@ class JSONLDExpandTest : public testing::Test { private: JSONLDExpandCase test_case_; - bool pending_{true}; }; auto sanitize(const std::string_view identifier) -> std::string { From 10599cdde2b1debc1be261862b1b8cb6618075c9 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Fri, 26 Jun 2026 14:32:15 -0300 Subject: [PATCH 03/13] Fix Signed-off-by: Juan Cruz Viotti --- src/core/jsonld/jsonld_expansion.cc | 53 +++++++++++++++-------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/src/core/jsonld/jsonld_expansion.cc b/src/core/jsonld/jsonld_expansion.cc index 0df961c25..995fa0b89 100644 --- a/src/core/jsonld/jsonld_expansion.cc +++ b/src/core/jsonld/jsonld_expansion.cc @@ -323,7 +323,8 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, if (result.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { auto merged{ into_array(std::move(result.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)))}; - for (auto &item : into_array(std::move(expanded_type)).as_array()) { + auto expanded_type_array{into_array(std::move(expanded_type))}; + for (auto &item : expanded_type_array.as_array()) { merged.push_back(item); } result.assign(KEYWORD_TYPE, std::move(merged)); @@ -436,8 +437,8 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, } else if (is_keyword(reverse_property, reverse_entry.hash)) { throw JSONLDError("Invalid reverse property map", entry_pointer); } else { - for (const auto &item : - into_array(JSON{reverse_entry.second}).as_array()) { + const auto reverse_values{into_array(JSON{reverse_entry.second})}; + for (const auto &item : reverse_values.as_array()) { if (item.is_object() && (item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH))) { @@ -521,10 +522,10 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, empty_weak_pointer)}; const bool none_key{expanded_key.has_value() && expanded_key.value() == KEYWORD_NONE}; - for (auto &item : - into_array(expand(state, value_context, property, *graph_value, - entry_pointer.concat(index))) - .as_array()) { + auto graph_items{ + into_array(expand(state, value_context, property, *graph_value, + entry_pointer.concat(index)))}; + for (auto &item : graph_items.as_array()) { // Wrap the item in a graph object, unless it is already one. JSON graph{nullptr}; if (item.is_object() && @@ -573,7 +574,8 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, const bool is_none{language == KEYWORD_NONE || (expanded_language.has_value() && expanded_language.value() == KEYWORD_NONE)}; - for (auto &item : into_array(JSON{*language_value}).as_array()) { + auto language_items{into_array(JSON{*language_value})}; + for (auto &item : language_items.as_array()) { if (item.is_null()) { continue; } @@ -613,10 +615,10 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, for (const auto &[index_key, index_value] : sorted_entries(entry.second)) { const JSON::String &index{*index_key}; - for (auto &item : - into_array(expand(state, value_context, property, *index_value, - entry_pointer.concat(index))) - .as_array()) { + auto index_items{ + into_array(expand(state, value_context, property, *index_value, + entry_pointer.concat(index)))}; + for (auto &item : index_items.as_array()) { if (index != KEYWORD_NONE) { if (property_valued) { if (item.is_object() && @@ -682,7 +684,8 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, } // String values in a type map are node references. auto entries{JSON::make_array()}; - for (auto &raw : into_array(JSON{*map_value}).as_array()) { + auto raw_values{into_array(JSON{*map_value})}; + for (auto &raw : raw_values.as_array()) { if (raw.is_string() && !by_id) { auto reference{JSON::make_object()}; const bool reference_vocab{definition->type_mapping.has_value() && @@ -697,10 +700,10 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, KEYWORD_ID_HASH); entries.push_back(std::move(reference)); } else { - for (auto &expanded : - into_array(expand(state, entry_context, property, raw, - entry_pointer.concat(index))) - .as_array()) { + auto expanded_items{ + into_array(expand(state, entry_context, property, raw, + entry_pointer.concat(index)))}; + for (auto &expanded : expanded_items.as_array()) { entries.push_back(expanded); } } @@ -717,10 +720,9 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, auto types{JSON::make_array()}; types.push_back(JSON{expanded_index.value()}); if (item.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { - for (auto &existing : - into_array( - std::move(item.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH))) - .as_array()) { + auto existing_types{into_array( + std::move(item.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)))}; + for (auto &existing : existing_types.as_array()) { types.push_back(existing); } } @@ -732,9 +734,9 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, } } else if (container_includes(definition, KEYWORD_GRAPH)) { expanded_value = JSON::make_array(); - for (auto &item : into_array(expand(state, value_context, property, - entry.second, entry_pointer)) - .as_array()) { + auto graph_items{into_array( + expand(state, value_context, property, entry.second, entry_pointer))}; + for (auto &item : graph_items.as_array()) { auto graph{JSON::make_object()}; graph.assign_assume_new(JSON::String{KEYWORD_GRAPH}, into_array(std::move(item)), @@ -777,7 +779,8 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, } if (definition != nullptr && definition->reverse) { - for (const auto &item : into_array(JSON{expanded_value}).as_array()) { + const auto reverse_items{into_array(JSON{expanded_value})}; + for (const auto &item : reverse_items.as_array()) { if (item.is_object() && (item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH))) { From 4a2baefb50d06cf0edb67c9e1f43a73980349c38 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Fri, 26 Jun 2026 15:14:33 -0300 Subject: [PATCH 04/13] More Signed-off-by: Juan Cruz Viotti --- src/core/jsonld/jsonld_expansion.cc | 40 ++++++++++++++------ test/jsonld/jsonld_expand_error_test.cc | 10 +++++ test/jsonld/jsonld_expand_test.cc | 49 +++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 12 deletions(-) diff --git a/src/core/jsonld/jsonld_expansion.cc b/src/core/jsonld/jsonld_expansion.cc index 995fa0b89..549f8f5c9 100644 --- a/src/core/jsonld/jsonld_expansion.cc +++ b/src/core/jsonld/jsonld_expansion.cc @@ -348,6 +348,13 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, } if (name == KEYWORD_DIRECTION) { + if (state.processing_1_0) { + continue; + } + if (!entry.second.is_string() || (entry.second.to_string() != "ltr" && + entry.second.to_string() != "rtl")) { + throw JSONLDError("Invalid base direction", entry_pointer); + } result.assign_assume_new(JSON::String{KEYWORD_DIRECTION}, JSON{entry.second}, KEYWORD_DIRECTION_HASH); continue; @@ -393,6 +400,9 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, } if (name == KEYWORD_INCLUDED) { + if (state.processing_1_0) { + continue; + } auto included{into_array(expand(state, active_context, std::nullopt, entry.second, entry_pointer))}; for (const auto &item : included.as_array()) { @@ -456,7 +466,9 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, } if (is_keyword(name)) { - // TODO: @direction, @json, @nest + // Keywords with no property-level handler (such as @vocab or @none + // reached through an alias) carry no expanded value, so the Expansion + // algorithm adds nothing to the result for them. continue; } @@ -666,12 +678,15 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, // Type-scoped contexts do not propagate, so the values are resolved // against the context that preceded the containing type-scoped // context, with only this key's context layered on top. - ActiveContext entry_context{value_context.previous && !by_id - ? *value_context.previous - : value_context}; + const ActiveContext &base_context{value_context.previous && !by_id + ? *value_context.previous + : value_context}; + ActiveContext entry_context{base_context}; if (!by_id) { - const auto type_definition{entry_context.terms.find(index)}; - if (type_definition != entry_context.terms.cend() && + // Resolve the type term against the context the copy was made from, + // which outlives the copy being mutated below. + const auto type_definition{base_context.terms.find(index)}; + if (type_definition != base_context.terms.cend() && type_definition->second.context.has_value()) { const auto saved_base{state.context_base_override}; state.context_base_override = type_definition->second.context_base; @@ -805,18 +820,19 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, const auto definition{active_context.terms.find(*nest_property)}; if (definition != active_context.terms.cend() && definition->second.context.has_value()) { - const ActiveContext saved{active_context}; + // Process the scoped context into a copy so the term that owns it is not + // freed while it is being read. + ActiveContext nested{active_context}; const auto saved_base{state.context_base_override}; state.context_base_override = definition->second.context_base; state.protected_override = true; - process_context(state, active_context, definition->second.context.value(), + process_context(state, nested, definition->second.context.value(), nest_pointer); state.protected_override = false; state.context_base_override = saved_base; - active_context.previous = nullptr; - expand_entries(state, active_context, type_context, result, - active_property, *nest, nest_pointer); - active_context = saved; + nested.previous = nullptr; + expand_entries(state, nested, type_context, result, active_property, + *nest, nest_pointer); } else { expand_entries(state, active_context, type_context, result, active_property, *nest, nest_pointer); diff --git a/test/jsonld/jsonld_expand_error_test.cc b/test/jsonld/jsonld_expand_error_test.cc index b30067049..23518920e 100644 --- a/test/jsonld/jsonld_expand_error_test.cc +++ b/test/jsonld/jsonld_expand_error_test.cc @@ -428,3 +428,13 @@ TEST(JSONLD_expand_error, list_of_lists) { sourcemeta::core::JSONLDVersion::V1_0), "List of lists", "/http:~1~1example.com~1p/@list"); } + +TEST(JSONLD_expand_error, invalid_base_direction_in_value_object) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "p": "http://example.com/p" }, + "p": { "@value": "v", "@direction": "up" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid base direction", "/p/@direction"); +} diff --git a/test/jsonld/jsonld_expand_test.cc b/test/jsonld/jsonld_expand_test.cc index e56b32ed9..e1e899ed4 100644 --- a/test/jsonld/jsonld_expand_test.cc +++ b/test/jsonld/jsonld_expand_test.cc @@ -228,3 +228,52 @@ TEST(JSONLD_expand, list_keyword) { EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); } + +TEST(JSONLD_expand, direction_dropped_in_json_ld_1_0) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "p": "http://example.com/p" }, + "p": { "@value": "v", "@direction": "rtl" } + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { "http://example.com/p": [ { "@value": "v" } ] } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand( + input, "", {}, sourcemeta::core::JSONLDVersion::V1_0), + expected); +} + +TEST(JSONLD_expand, included_dropped_in_json_ld_1_0) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "p": "http://example.com/p" }, + "p": "v", + "@included": { "@id": "http://example.com/other" } + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { "http://example.com/p": [ { "@value": "v" } ] } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand( + input, "", {}, sourcemeta::core::JSONLDVersion::V1_0), + expected); +} + +TEST(JSONLD_expand, nest_term_whose_scoped_context_redefines_itself) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "nest": { + "@id": "@nest", + "@context": { "nest": { "@id": "http://example.com/nest" } } + } + }, + "nest": { "http://example.com/foo": "bar" } + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { "http://example.com/foo": [ { "@value": "bar" } ] } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} From 64ce168083d72e99576946404743ae4bbb2b06b5 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Fri, 26 Jun 2026 15:32:04 -0300 Subject: [PATCH 05/13] More Signed-off-by: Juan Cruz Viotti --- src/core/jsonld/jsonld_expansion.cc | 26 +++++++++++------- test/jsonld/jsonld_expand_error_test.cc | 35 ++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/src/core/jsonld/jsonld_expansion.cc b/src/core/jsonld/jsonld_expansion.cc index 549f8f5c9..4339e5003 100644 --- a/src/core/jsonld/jsonld_expansion.cc +++ b/src/core/jsonld/jsonld_expansion.cc @@ -156,14 +156,9 @@ auto expand_object(ExpansionState &state, ActiveContext active_context, if (content.is_null() && !is_json) { return JSON{nullptr}; } - if (const auto *language{ - result.try_at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH)}) { - if (!language->is_string()) { - throw JSONLDError("Invalid language-tagged string", pointer); - } - if (!content.is_string()) { - throw JSONLDError("Invalid language-tagged value", pointer); - } + if (result.defines(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) && + !content.is_string()) { + throw JSONLDError("Invalid language-tagged value", pointer); } if (has_type && (type_string == nullptr || type_string->starts_with("_:") || type_string->find(' ') != JSON::String::npos)) { @@ -276,8 +271,16 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, continue; } - if (is_keyword(name) && name != KEYWORD_TYPE && name != KEYWORD_INCLUDED && - result.defines(name)) { + if (is_keyword(name) && active_property.has_value() && + active_property.value() == KEYWORD_REVERSE) { + throw JSONLDError("Invalid reverse property map", entry_pointer); + } + + // The @type and @included exemption from colliding keywords does not apply + // in json-ld-1.0. + if (is_keyword(name) && result.defines(name) && + (state.processing_1_0 || + (name != KEYWORD_TYPE && name != KEYWORD_INCLUDED))) { throw JSONLDError("Colliding keywords", entry_pointer); } @@ -342,6 +345,9 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, } if (name == KEYWORD_LANGUAGE) { + if (!entry.second.is_string()) { + throw JSONLDError("Invalid language-tagged string", entry_pointer); + } result.assign_assume_new(JSON::String{KEYWORD_LANGUAGE}, JSON{entry.second}, KEYWORD_LANGUAGE_HASH); continue; diff --git a/test/jsonld/jsonld_expand_error_test.cc b/test/jsonld/jsonld_expand_error_test.cc index 23518920e..4cc698d9c 100644 --- a/test/jsonld/jsonld_expand_error_test.cc +++ b/test/jsonld/jsonld_expand_error_test.cc @@ -332,7 +332,7 @@ TEST(JSONLD_expand_error, invalid_language_tagged_string) { EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), "Invalid language-tagged string", - "/http:~1~1example.com~1p"); + "/http:~1~1example.com~1p/@language"); } TEST(JSONLD_expand_error, invalid_language_tagged_value) { @@ -438,3 +438,36 @@ TEST(JSONLD_expand_error, invalid_base_direction_in_value_object) { EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), "Invalid base direction", "/p/@direction"); } + +TEST(JSONLD_expand_error, keyword_alias_dropped_inside_reverse_map) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "none": "@none" }, + "@reverse": { "none": "x" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid reverse property map", "/@reverse/none"); +} + +TEST(JSONLD_expand_error, colliding_type_in_json_ld_1_0) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "type": "@type" }, + "@type": "http://example.com/A", + "type": "http://example.com/B" + })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "Colliding keywords", "/type"); +} + +TEST(JSONLD_expand_error, invalid_language_tagged_string_without_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "p": "http://example.com/p" }, + "p": { "@language": 42 } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid language-tagged string", "/p/@language"); +} From b1cd3e316df90b72dc676fb5fedb30adef8aacc7 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Fri, 26 Jun 2026 16:12:09 -0300 Subject: [PATCH 06/13] More Signed-off-by: Juan Cruz Viotti --- src/core/jsonld/jsonld_context_processing.cc | 64 +++++++---- .../jsonld/jsonld_create_term_definition.cc | 107 ++++++++++++++---- src/core/jsonld/jsonld_expansion.cc | 2 +- src/core/jsonld/jsonld_iri_expansion.cc | 5 +- test/jsonld/jsonld_expand_error_test.cc | 107 ++++++++++++++++++ 5 files changed, 238 insertions(+), 47 deletions(-) diff --git a/src/core/jsonld/jsonld_context_processing.cc b/src/core/jsonld/jsonld_context_processing.cc index c557f5288..9d7ce91e6 100644 --- a/src/core/jsonld/jsonld_context_processing.cc +++ b/src/core/jsonld/jsonld_context_processing.cc @@ -25,6 +25,23 @@ auto process_context(ExpansionState &state, ActiveContext &active_context, contexts.emplace_back(&local_context, pointer); } + // The @propagate flag is read once from a top-level map context, before the + // contexts are processed (JSON-LD 1.1 API Section 5.1 steps 2 and 3). A + // non-boolean value is reported per entry by the loop below. + bool effective_propagate{propagate}; + if (local_context.is_object()) { + if (const auto *propagate_entry{ + local_context.try_at(KEYWORD_PROPAGATE, KEYWORD_PROPAGATE_HASH)}; + propagate_entry != nullptr && propagate_entry->is_boolean()) { + effective_propagate = propagate_entry->to_boolean(); + } + } + if (!effective_propagate && !active_context.previous) { + auto snapshot{std::make_shared(active_context)}; + snapshot->previous = nullptr; + active_context.previous = snapshot; + } + for (const auto &[entry_pointer, location] : contexts) { const auto &context{*entry_pointer}; if (context.is_null()) { @@ -71,8 +88,8 @@ auto process_context(ExpansionState &state, ActiveContext &active_context, } state.remote_context_chain.push_back(reference); try { - process_context(state, active_context, *context_entry, location, - propagate); + // A loaded remote context is processed with the default propagation. + process_context(state, active_context, *context_entry, location); } catch (...) { state.remote_context_chain.pop_back(); throw; @@ -85,17 +102,17 @@ auto process_context(ExpansionState &state, ActiveContext &active_context, throw JSONLDError("Invalid local context", location); } - if (state.processing_1_0 && - context.defines(KEYWORD_VERSION, KEYWORD_VERSION_HASH)) { - throw JSONLDError("Processing mode conflict", location, - {KEYWORD_VERSION}); - } if (const auto *version{ context.try_at(KEYWORD_VERSION, KEYWORD_VERSION_HASH)}; version != nullptr && (!version->is_real() || version->to_real() != 1.1)) { throw JSONLDError("Invalid @version value", location, {KEYWORD_VERSION}); } + if (state.processing_1_0 && + context.defines(KEYWORD_VERSION, KEYWORD_VERSION_HASH)) { + throw JSONLDError("Processing mode conflict", location, + {KEYWORD_VERSION}); + } if (state.processing_1_0 && (context.defines(KEYWORD_PROPAGATE, KEYWORD_PROPAGATE_HASH) || context.defines(KEYWORD_IMPORT, KEYWORD_IMPORT_HASH) || @@ -103,19 +120,11 @@ auto process_context(ExpansionState &state, ActiveContext &active_context, throw JSONLDError("Invalid context entry", location); } - bool effective_propagate{propagate}; if (const auto *propagate_entry{ - context.try_at(KEYWORD_PROPAGATE, KEYWORD_PROPAGATE_HASH)}) { - if (!propagate_entry->is_boolean()) { - throw JSONLDError("Invalid @propagate value", location, - {KEYWORD_PROPAGATE}); - } - effective_propagate = propagate_entry->to_boolean(); - } - if (!effective_propagate && !active_context.previous) { - auto snapshot{std::make_shared(active_context)}; - snapshot->previous = nullptr; - active_context.previous = snapshot; + context.try_at(KEYWORD_PROPAGATE, KEYWORD_PROPAGATE_HASH)}; + propagate_entry != nullptr && !propagate_entry->is_boolean()) { + throw JSONLDError("Invalid @propagate value", location, + {KEYWORD_PROPAGATE}); } // @protected applies to imported terms too, so it is set before @import. @@ -177,13 +186,18 @@ auto process_context(ExpansionState &state, ActiveContext &active_context, active_context.base = std::nullopt; } else if (!base.is_string()) { throw JSONLDError("Invalid base IRI", location, {KEYWORD_BASE}); - } else if (active_context.base.has_value()) { - active_context.base = - URI::from_iri(base.to_string()) - .resolve_from(URI::from_iri(active_context.base.value())) - .recompose(); } else { - active_context.base = base.to_string(); + const auto &base_string{base.to_string()}; + if (active_context.base.has_value()) { + active_context.base = + URI::from_iri(base_string) + .resolve_from(URI::from_iri(active_context.base.value())) + .recompose(); + } else if (URI::from_iri(base_string).is_absolute()) { + active_context.base = base_string; + } else { + throw JSONLDError("Invalid base IRI", location, {KEYWORD_BASE}); + } } } diff --git a/src/core/jsonld/jsonld_create_term_definition.cc b/src/core/jsonld/jsonld_create_term_definition.cc index 422ab726e..ed1aac1f0 100644 --- a/src/core/jsonld/jsonld_create_term_definition.cc +++ b/src/core/jsonld/jsonld_create_term_definition.cc @@ -92,7 +92,7 @@ auto create_term_definition(ExpansionState &state, type_definition.is_protected = entry.second.to_boolean(); } else if (name == KEYWORD_CONTAINER && entry.second.is_string()) { const auto &container{entry.second.to_string()}; - if (container == KEYWORD_SET || container == KEYWORD_ID) { + if (container == KEYWORD_SET) { type_definition.container.push_back(container); has_container = true; } else { @@ -141,6 +141,22 @@ auto create_term_definition(ExpansionState &state, if (value.is_null() || (id_entry != nullptr && id_entry->is_null())) { TermDefinition empty; empty.is_protected = state.context_protected; + // @protected is processed before the null @id is handled, so an explicitly + // protected term that maps to null stays protected. + if (id_entry != nullptr) { + if (const auto *protected_entry{ + value.try_at(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH)}) { + if (!protected_entry->is_boolean()) { + throw JSONLDError("Invalid @protected value", term_pointer, + {KEYWORD_PROTECTED}); + } + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_PROTECTED}); + } + empty.is_protected = protected_entry->to_boolean(); + } + } finalize_definition(state, active_context, defined, term, term_pointer, previous, std::move(empty)); return; @@ -341,10 +357,10 @@ auto create_term_definition(ExpansionState &state, } } else if (container.is_string()) { const auto &container_string{container.to_string()}; - // In 1.0, only @list, @set and @index are permitted. - if (state.processing_1_0 && container_string != KEYWORD_LIST && - container_string != KEYWORD_SET && - container_string != KEYWORD_INDEX) { + // In 1.0, the @graph, @id and @type containers are not permitted. + if (state.processing_1_0 && (container_string == KEYWORD_GRAPH || + container_string == KEYWORD_ID || + container_string == KEYWORD_TYPE)) { throw JSONLDError("Invalid container mapping", term_pointer, {KEYWORD_CONTAINER}); } @@ -365,21 +381,46 @@ auto create_term_definition(ExpansionState &state, } } } - bool has_list{false}; - bool has_type{false}; + bool container_graph{false}; + bool container_id{false}; + bool container_index{false}; + bool container_language{false}; + bool container_list{false}; + bool container_set{false}; + bool container_type{false}; for (const auto &item : definition.container) { - if (item == KEYWORD_LIST) { - has_list = true; + if (item == KEYWORD_GRAPH) { + container_graph = true; + } else if (item == KEYWORD_ID) { + container_id = true; + } else if (item == KEYWORD_INDEX) { + container_index = true; + } else if (item == KEYWORD_LANGUAGE) { + container_language = true; + } else if (item == KEYWORD_LIST) { + container_list = true; + } else if (item == KEYWORD_SET) { + container_set = true; } else if (item == KEYWORD_TYPE) { - has_type = true; + container_type = true; } } - if (has_list && definition.container.size() > 1) { - throw JSONLDError("Invalid container mapping", term_pointer, - {KEYWORD_CONTAINER}); + // Valid array combinations (JSON-LD 1.1 API Section 5.1.1 step 19.1): a + // single keyword, or @graph with exactly one of @id or @index optionally + // with @set, or @set combined with any of @index, @graph, @id, @type, or + // @language. + if (definition.container.size() != 1) { + const bool graph_form{ + container_graph && (container_id != container_index) && + !container_list && !container_type && !container_language}; + const bool set_form{container_set && !container_list}; + if (!graph_form && !set_form) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } } // A type-map container may only coerce its keys to identifiers. - if (has_type && definition.type_mapping.has_value() && + if (container_type && definition.type_mapping.has_value() && definition.type_mapping.value() != KEYWORD_ID && definition.type_mapping.value() != KEYWORD_VOCAB) { throw JSONLDError("Invalid type mapping", term_pointer, {KEYWORD_TYPE}); @@ -406,6 +447,12 @@ auto create_term_definition(ExpansionState &state, direction_entry != nullptr && !value.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { const auto &direction{*direction_entry}; + if (!direction.is_null() && + (!direction.is_string() || (direction.to_string() != "ltr" && + direction.to_string() != "rtl"))) { + throw JSONLDError("Invalid base direction", term_pointer, + {KEYWORD_DIRECTION}); + } definition.has_direction = true; if (direction.is_string()) { definition.direction = direction.to_string(); @@ -445,16 +492,16 @@ auto create_term_definition(ExpansionState &state, if (const auto *prefix_entry{ value.try_at(KEYWORD_PREFIX, KEYWORD_PREFIX_HASH)}) { + if (state.processing_1_0 || term.find(':') != JSON::String::npos || + term.find('/') != JSON::String::npos) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_PREFIX}); + } if (!prefix_entry->is_boolean()) { throw JSONLDError("Invalid @prefix value", term_pointer, {KEYWORD_PREFIX}); } definition.prefix = prefix_entry->to_boolean(); - if (definition.prefix && (term.find(':') != JSON::String::npos || - term.find('/') != JSON::String::npos)) { - throw JSONLDError("Invalid term definition", term_pointer, - {KEYWORD_PREFIX}); - } if (definition.prefix && definition.iri.has_value() && is_keyword(definition.iri.value())) { throw JSONLDError("Invalid term definition", term_pointer, @@ -463,6 +510,10 @@ auto create_term_definition(ExpansionState &state, } if (const auto *nest_entry{value.try_at(KEYWORD_NEST, KEYWORD_NEST_HASH)}) { + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_NEST}); + } const auto &nest{*nest_entry}; if (!nest.is_string()) { throw JSONLDError("Invalid @nest value", term_pointer, {KEYWORD_NEST}); @@ -507,8 +558,26 @@ auto create_term_definition(ExpansionState &state, throw JSONLDError("Invalid @protected value", term_pointer, {KEYWORD_PROTECTED}); } + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_PROTECTED}); + } definition.is_protected = protected_entry->to_boolean(); } + + // A term definition may not contain any entry other than the keywords + // recognised above. + for (const auto &entry : value.as_object()) { + const JSON::StringView key{entry.first}; + if (key != KEYWORD_ID && key != KEYWORD_REVERSE && + key != KEYWORD_CONTAINER && key != KEYWORD_CONTEXT && + key != KEYWORD_DIRECTION && key != KEYWORD_INDEX && + key != KEYWORD_LANGUAGE && key != KEYWORD_NEST && + key != KEYWORD_PREFIX && key != KEYWORD_PROTECTED && + key != KEYWORD_TYPE) { + throw JSONLDError("Invalid term definition", term_pointer); + } + } } else { throw JSONLDError("Invalid term definition", term_pointer); } diff --git a/src/core/jsonld/jsonld_expansion.cc b/src/core/jsonld/jsonld_expansion.cc index 4339e5003..cb6efe317 100644 --- a/src/core/jsonld/jsonld_expansion.cc +++ b/src/core/jsonld/jsonld_expansion.cc @@ -768,7 +768,7 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, if (scoped && value_context.previous && entry.second.is_object() && !entry.second.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { // A non-propagating property-scoped context applies to the immediate - // node; nested nodes revert to the previous context. + // node, while nested nodes revert to the previous context. expanded_value = expand_object(state, value_context, property, entry.second, entry_pointer); } else { diff --git a/src/core/jsonld/jsonld_iri_expansion.cc b/src/core/jsonld/jsonld_iri_expansion.cc index 04d99edbb..ff6364cd7 100644 --- a/src/core/jsonld/jsonld_iri_expansion.cc +++ b/src/core/jsonld/jsonld_iri_expansion.cc @@ -40,8 +40,9 @@ auto expand_iri(ExpansionState &state, ActiveContext &active_context, return term->second.iri; } - const auto colon{value.find(':', 1)}; - if (colon != JSON::String::npos) { + if (value.find(':', 1) != JSON::String::npos) { + // The term has the form of a compact IRI, so split at the first colon. + const auto colon{value.find(':')}; const auto prefix{value.substr(0, colon)}; const auto suffix{value.substr(colon + 1)}; if (prefix == "_" || suffix.starts_with("//")) { diff --git a/test/jsonld/jsonld_expand_error_test.cc b/test/jsonld/jsonld_expand_error_test.cc index 4cc698d9c..2824190ff 100644 --- a/test/jsonld/jsonld_expand_error_test.cc +++ b/test/jsonld/jsonld_expand_error_test.cc @@ -471,3 +471,110 @@ TEST(JSONLD_expand_error, invalid_language_tagged_string_without_value) { EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), "Invalid language-tagged string", "/p/@language"); } + +TEST(JSONLD_expand_error, protected_in_term_definition_in_1_0) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@protected": true } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "Invalid term definition", "/@context/a/@protected"); +} + +TEST(JSONLD_expand_error, nest_in_term_definition_in_1_0) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@nest": "@nest" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "Invalid term definition", "/@context/a/@nest"); +} + +TEST(JSONLD_expand_error, prefix_on_compact_iri_term) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "ex": "http://example.com/", + "ex:foo": { "@id": "http://example.com/foo", "@prefix": true } + } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid term definition", + "/@context/ex:foo/@prefix"); +} + +TEST(JSONLD_expand_error, invalid_base_direction_in_term_definition) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@direction": "up" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid base direction", + "/@context/a/@direction"); +} + +TEST(JSONLD_expand_error, invalid_container_array_combination) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "a": { "@id": "http://example.com/a", "@container": [ "@id", "@language" ] } + } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid container mapping", + "/@context/a/@container"); +} + +TEST(JSONLD_expand_error, unknown_entry_in_term_definition) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@bogus": true } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid term definition", "/@context/a"); +} + +TEST(JSONLD_expand_error, type_keyword_container_id) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "@type": { "@container": "@id" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Keyword redefinition", "/@context/@type"); +} + +TEST(JSONLD_expand_error, invalid_version_precedes_mode_conflict) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "@version": 1.5 } + })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "Invalid @version value", "/@context/@version"); +} + +TEST(JSONLD_expand_error, relative_base_without_base) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": [ { "@base": null }, { "@base": "relative/path" } ] + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid base IRI", "/@context/1/@base"); +} + +TEST(JSONLD_expand_error, protected_null_term_redefinition) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": [ + { "term": { "@id": null, "@protected": true } }, + { "term": "http://example.com/x" } + ] + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Protected term redefinition", "/@context/1/term"); +} From c2f57713a29d90fa582ad72fcaf61d2e34b85a77 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Fri, 26 Jun 2026 18:57:53 -0300 Subject: [PATCH 07/13] More Signed-off-by: Juan Cruz Viotti --- src/core/jsonld/jsonld_expansion.cc | 84 +++++++++++++------------ test/jsonld/jsonld_expand_error_test.cc | 9 +++ test/jsonld/jsonld_expand_test.cc | 23 +++++++ 3 files changed, 75 insertions(+), 41 deletions(-) diff --git a/src/core/jsonld/jsonld_expansion.cc b/src/core/jsonld/jsonld_expansion.cc index cb6efe317..e0c113ce1 100644 --- a/src/core/jsonld/jsonld_expansion.cc +++ b/src/core/jsonld/jsonld_expansion.cc @@ -175,6 +175,24 @@ auto expand_object(ExpansionState &state, ActiveContext active_context, result.assign(KEYWORD_TYPE, into_array(JSON{*type_entry})); } + // A set or list object may only carry an @index entry besides, and this is + // validated before any value is dropped. + if (result.defines(KEYWORD_LIST, KEYWORD_LIST_HASH) || + result.defines(KEYWORD_SET, KEYWORD_SET_HASH)) { + for (const auto &entry : result.as_object()) { + const auto &name{entry.first}; + if (name != KEYWORD_LIST && name != KEYWORD_SET && + name != KEYWORD_INDEX) { + throw JSONLDError("Invalid set or list object", pointer); + } + } + } + + // A bare @set collapses to its array. + if (const auto *set{result.try_at(KEYWORD_SET, KEYWORD_SET_HASH)}) { + return *set; + } + // Drop an incomplete value object that has a language or direction but no // value. if (!result.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) && @@ -204,22 +222,6 @@ auto expand_object(ExpansionState &state, ActiveContext active_context, } } - if (result.defines(KEYWORD_LIST, KEYWORD_LIST_HASH) || - result.defines(KEYWORD_SET, KEYWORD_SET_HASH)) { - for (const auto &entry : result.as_object()) { - const auto &name{entry.first}; - if (name != KEYWORD_LIST && name != KEYWORD_SET && - name != KEYWORD_INDEX) { - throw JSONLDError("Invalid set or list object", pointer); - } - } - } - - // A bare @set collapses to its array. - if (const auto *set{result.try_at(KEYWORD_SET, KEYWORD_SET_HASH)}) { - return *set; - } - return result; } @@ -503,23 +505,20 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, } ActiveContext &value_context{scoped ? scoped_context : active_context}; - // A term coerced to @json keeps its value verbatim. + JSON expanded_value{nullptr}; if (definition != nullptr && definition->type_mapping.has_value() && definition->type_mapping.value() == KEYWORD_JSON) { + // A term coerced to @json keeps its value verbatim. auto json_value{JSON::make_object()}; json_value.assign_assume_new(JSON::String{KEYWORD_VALUE}, JSON{entry.second}, KEYWORD_VALUE_HASH); json_value.assign_assume_new(JSON::String{KEYWORD_TYPE}, JSON{KEYWORD_JSON}, KEYWORD_TYPE_HASH); - merge(result, name, into_array(std::move(json_value))); - continue; - } - - JSON expanded_value{nullptr}; - if (entry.second.is_object() && - container_includes(definition, KEYWORD_GRAPH) && - (container_includes(definition, KEYWORD_ID) || - container_includes(definition, KEYWORD_INDEX))) { + expanded_value = std::move(json_value); + } else if (entry.second.is_object() && + container_includes(definition, KEYWORD_GRAPH) && + (container_includes(definition, KEYWORD_ID) || + container_includes(definition, KEYWORD_INDEX))) { const bool by_id{container_includes(definition, KEYWORD_ID)}; const bool property_valued{definition->index.has_value() && definition->index.value() != KEYWORD_INDEX}; @@ -775,24 +774,27 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, expanded_value = expand(state, value_context, property, entry.second, entry_pointer); } - if (container_includes(definition, KEYWORD_LIST) && - !expanded_value.is_null() && - !(expanded_value.is_object() && - expanded_value.defines(KEYWORD_LIST, KEYWORD_LIST_HASH))) { - expanded_value = into_array(std::move(expanded_value)); - if (state.processing_1_0) { - for (const auto &item : expanded_value.as_array()) { - if (item.is_object() && - item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { - throw JSONLDError("List of lists", entry_pointer); - } + } + + // A @list container wraps the expanded value, including a @json-coerced + // one. + if (container_includes(definition, KEYWORD_LIST) && + !expanded_value.is_null() && + !(expanded_value.is_object() && + expanded_value.defines(KEYWORD_LIST, KEYWORD_LIST_HASH))) { + expanded_value = into_array(std::move(expanded_value)); + if (state.processing_1_0) { + for (const auto &item : expanded_value.as_array()) { + if (item.is_object() && + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + throw JSONLDError("List of lists", entry_pointer); } } - auto wrapper{JSON::make_object()}; - wrapper.assign_assume_new(JSON::String{KEYWORD_LIST}, - std::move(expanded_value), KEYWORD_LIST_HASH); - expanded_value = std::move(wrapper); } + auto wrapper{JSON::make_object()}; + wrapper.assign_assume_new(JSON::String{KEYWORD_LIST}, + std::move(expanded_value), KEYWORD_LIST_HASH); + expanded_value = std::move(wrapper); } if (expanded_value.is_null()) { diff --git a/test/jsonld/jsonld_expand_error_test.cc b/test/jsonld/jsonld_expand_error_test.cc index 2824190ff..644de5aca 100644 --- a/test/jsonld/jsonld_expand_error_test.cc +++ b/test/jsonld/jsonld_expand_error_test.cc @@ -578,3 +578,12 @@ TEST(JSONLD_expand_error, protected_null_term_redefinition) { EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), "Protected term redefinition", "/@context/1/term"); } + +TEST(JSONLD_expand_error, free_floating_invalid_set_or_list_object) { + const auto input = sourcemeta::core::parse_json(R"({ + "@list": [ "foo" ], "@id": "http://example.com/bar" + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid set or list object", ""); +} diff --git a/test/jsonld/jsonld_expand_test.cc b/test/jsonld/jsonld_expand_test.cc index e1e899ed4..66ad2d53e 100644 --- a/test/jsonld/jsonld_expand_test.cc +++ b/test/jsonld/jsonld_expand_test.cc @@ -277,3 +277,26 @@ TEST(JSONLD_expand, nest_term_whose_scoped_context_redefines_itself) { EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); } + +TEST(JSONLD_expand, json_typed_value_in_list_container) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "e": { + "@id": "http://example.com/e", + "@type": "@json", + "@container": "@list" + } + }, + "e": 42 + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/e": [ + { "@list": [ { "@value": 42, "@type": "@json" } ] } + ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} From afedfc4d89731f15559ecb371439438991de2c79 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Fri, 26 Jun 2026 19:25:49 -0300 Subject: [PATCH 08/13] Fix Signed-off-by: Juan Cruz Viotti --- config.cmake.in | 3 ++ src/core/jsonld/jsonld.cc | 28 +++++++++-------- .../jsonld/jsonld_create_term_definition.cc | 6 +++- src/core/jsonld/jsonld_expansion.cc | 11 ++++++- src/core/jsonld/jsonld_value_expansion.cc | 8 +++-- test/jsonld/jsonld_expand_test.cc | 30 +++++++++++++++++++ 6 files changed, 70 insertions(+), 16 deletions(-) diff --git a/config.cmake.in b/config.cmake.in index f192bb6de..889e1c798 100644 --- a/config.cmake.in +++ b/config.cmake.in @@ -138,14 +138,17 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") elseif(component STREQUAL "jsonld") + find_dependency(PCRE2 CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_text.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_langtag.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonld.cmake") elseif(component STREQUAL "yaml") diff --git a/src/core/jsonld/jsonld.cc b/src/core/jsonld/jsonld.cc index 0be074652..0ee90af8e 100644 --- a/src/core/jsonld/jsonld.cc +++ b/src/core/jsonld/jsonld.cc @@ -40,6 +40,20 @@ auto run_expansion(ExpansionState &state, ActiveContext &active_context, return expanded; } +// Set up the shared expansion state and initial active context from the public +// entry-point arguments. +auto initialise_expansion(const JSONLDResolver &resolver, + const JSON::StringView base_iri, + const JSONLDVersion version, ExpansionState &state, + ActiveContext &active_context) -> void { + state.resolver = &resolver; + state.processing_1_0 = version == JSONLDVersion::V1_0; + if (!base_iri.empty()) { + active_context.base = JSON::String{base_iri}; + state.document_base = JSON::String{base_iri}; + } +} + } // namespace sourcemeta::core namespace sourcemeta::core { @@ -48,13 +62,8 @@ auto jsonld_expand(const JSON &input, const JSON::StringView base_iri, const JSONLDResolver &resolver, const JSONLDVersion version) -> JSON { ExpansionState state; - state.resolver = &resolver; - state.processing_1_0 = version == JSONLDVersion::V1_0; ActiveContext active_context; - if (!base_iri.empty()) { - active_context.base = JSON::String{base_iri}; - state.document_base = JSON::String{base_iri}; - } + initialise_expansion(resolver, base_iri, version, state, active_context); return run_expansion(state, active_context, input); } @@ -63,13 +72,8 @@ auto jsonld_expand(const JSON &input, const JSON &expand_context, const JSONLDResolver &resolver, const JSONLDVersion version) -> JSON { ExpansionState state; - state.resolver = &resolver; - state.processing_1_0 = version == JSONLDVersion::V1_0; ActiveContext active_context; - if (!base_iri.empty()) { - active_context.base = JSON::String{base_iri}; - state.document_base = JSON::String{base_iri}; - } + initialise_expansion(resolver, base_iri, version, state, active_context); const auto &context{ expand_context.is_object() && expand_context.defines(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) diff --git a/src/core/jsonld/jsonld_create_term_definition.cc b/src/core/jsonld/jsonld_create_term_definition.cc index ed1aac1f0..95cefd91e 100644 --- a/src/core/jsonld/jsonld_create_term_definition.cc +++ b/src/core/jsonld/jsonld_create_term_definition.cc @@ -31,7 +31,8 @@ auto same_definition(const TermDefinition &left, const TermDefinition &right) left.has_language == right.has_language && left.direction == right.direction && left.has_direction == right.has_direction && - left.context == right.context && left.index == right.index && + left.context == right.context && + left.context_base == right.context_base && left.index == right.index && left.reverse == right.reverse && left.prefix == right.prefix; } @@ -469,6 +470,7 @@ auto create_term_definition(ExpansionState &state, // the term is never used. Remote scoped contexts (including recursive // ones) are validated lazily when the term is used instead. const bool saved_override{state.protected_override}; + const bool saved_context_protected{state.context_protected}; try { // The error raised here is always discarded below, so its location does // not matter. @@ -476,8 +478,10 @@ auto create_term_definition(ExpansionState &state, state.protected_override = true; process_context(state, probe, *context_entry, empty_weak_pointer); state.protected_override = saved_override; + state.context_protected = saved_context_protected; } catch (const JSONLDError &error) { state.protected_override = saved_override; + state.context_protected = saved_context_protected; const JSON::StringView code{error.what()}; if (code != "Loading remote context failed" && code != "Recursive context inclusion" && diff --git a/src/core/jsonld/jsonld_expansion.cc b/src/core/jsonld/jsonld_expansion.cc index e0c113ce1..372dbc4fc 100644 --- a/src/core/jsonld/jsonld_expansion.cc +++ b/src/core/jsonld/jsonld_expansion.cc @@ -320,11 +320,20 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, if (entry.second.is_array()) { expanded_type = JSON::make_array(); for (const auto &item : entry.second.as_array()) { - expanded_type.push_back(expand_type(state, type_context, item)); + // A value that does not expand to an IRI is omitted, so @type stays + // an array of strings. + auto type{expand_type(state, type_context, item)}; + if (!type.is_null()) { + expanded_type.push_back(std::move(type)); + } } } else { expanded_type = expand_type(state, type_context, entry.second); } + // A lone @type value that does not expand to an IRI carries nothing. + if (expanded_type.is_null()) { + continue; + } if (result.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { auto merged{ into_array(std::move(result.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)))}; diff --git a/src/core/jsonld/jsonld_value_expansion.cc b/src/core/jsonld/jsonld_value_expansion.cc index 6c3b9b29e..ed5f3dfcd 100644 --- a/src/core/jsonld/jsonld_value_expansion.cc +++ b/src/core/jsonld/jsonld_value_expansion.cc @@ -25,7 +25,9 @@ auto expand_value(ExpansionState &state, ActiveContext &active_context, true, false, nullptr, nullptr, empty_weak_pointer)}; result.assign_assume_new(JSON::String{KEYWORD_ID}, - JSON{identifier.value_or("")}, KEYWORD_ID_HASH); + identifier.has_value() ? JSON{identifier.value()} + : JSON{nullptr}, + KEYWORD_ID_HASH); return result; } if (definition->type_mapping.value() == KEYWORD_VOCAB) { @@ -34,7 +36,9 @@ auto expand_value(ExpansionState &state, ActiveContext &active_context, true, true, nullptr, nullptr, empty_weak_pointer)}; result.assign_assume_new(JSON::String{KEYWORD_ID}, - JSON{identifier.value_or("")}, KEYWORD_ID_HASH); + identifier.has_value() ? JSON{identifier.value()} + : JSON{nullptr}, + KEYWORD_ID_HASH); return result; } } diff --git a/test/jsonld/jsonld_expand_test.cc b/test/jsonld/jsonld_expand_test.cc index 66ad2d53e..d6d840a7b 100644 --- a/test/jsonld/jsonld_expand_test.cc +++ b/test/jsonld/jsonld_expand_test.cc @@ -300,3 +300,33 @@ TEST(JSONLD_expand, json_typed_value_in_list_container) { EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); } + +TEST(JSONLD_expand, id_typed_keyword_form_value_expands_to_null) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "p": { "@id": "http://example.com/p", "@type": "@id" } }, + "p": "@foo" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { "http://example.com/p": [ { "@id": null } ] } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, non_expandable_type_value_is_omitted) { + const auto input = sourcemeta::core::parse_json(R"({ + "@id": "http://example.com/n", + "@type": "@foo", + "http://example.com/p": "v" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "@id": "http://example.com/n", + "http://example.com/p": [ { "@value": "v" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} From ad80922aafeccc995d46ef84268d1f036a2ba234 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Fri, 26 Jun 2026 20:26:55 -0300 Subject: [PATCH 09/13] More Signed-off-by: Juan Cruz Viotti --- src/core/jsonld/jsonld_context_processing.cc | 6 +++++- src/core/jsonld/jsonld_expansion.cc | 11 +++++++---- test/jsonld/jsonld_expand_error_test.cc | 10 ++++++++++ test/jsonld/jsonld_expand_test.cc | 8 ++++++-- 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/core/jsonld/jsonld_context_processing.cc b/src/core/jsonld/jsonld_context_processing.cc index 9d7ce91e6..c7174491d 100644 --- a/src/core/jsonld/jsonld_context_processing.cc +++ b/src/core/jsonld/jsonld_context_processing.cc @@ -156,8 +156,12 @@ auto process_context(ExpansionState &state, ActiveContext &active_context, {KEYWORD_IMPORT}); } const auto document{(*state.resolver)(reference)}; + if (!document.has_value()) { + throw JSONLDError("Loading remote context failed", location, + {KEYWORD_IMPORT}); + } const auto *imported_context{ - document.has_value() && document->is_object() + document->is_object() ? document->try_at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) : nullptr}; if (imported_context == nullptr || !imported_context->is_object()) { diff --git a/src/core/jsonld/jsonld_expansion.cc b/src/core/jsonld/jsonld_expansion.cc index 372dbc4fc..9d7d9ef3b 100644 --- a/src/core/jsonld/jsonld_expansion.cc +++ b/src/core/jsonld/jsonld_expansion.cc @@ -409,10 +409,13 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, } if (name == KEYWORD_GRAPH) { - merge( - result, KEYWORD_GRAPH, - into_array(expand(state, active_context, JSON::String{KEYWORD_GRAPH}, - entry.second, entry_pointer))); + // @graph expands to an array of node objects, so a value that expands to + // null contributes no element rather than a null one. + auto graph{expand(state, active_context, JSON::String{KEYWORD_GRAPH}, + entry.second, entry_pointer)}; + merge(result, KEYWORD_GRAPH, + graph.is_null() ? JSON::make_array() + : into_array(std::move(graph))); continue; } diff --git a/test/jsonld/jsonld_expand_error_test.cc b/test/jsonld/jsonld_expand_error_test.cc index 644de5aca..3a6350d01 100644 --- a/test/jsonld/jsonld_expand_error_test.cc +++ b/test/jsonld/jsonld_expand_error_test.cc @@ -587,3 +587,13 @@ TEST(JSONLD_expand_error, free_floating_invalid_set_or_list_object) { EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), "Invalid set or list object", ""); } + +TEST(JSONLD_expand_error, import_loading_failed) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "@import": "https://example.com/unknown" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", remote_resolver()), + "Loading remote context failed", "/@context/@import"); +} diff --git a/test/jsonld/jsonld_expand_test.cc b/test/jsonld/jsonld_expand_test.cc index d6d840a7b..3112573fc 100644 --- a/test/jsonld/jsonld_expand_test.cc +++ b/test/jsonld/jsonld_expand_test.cc @@ -5,9 +5,7 @@ TEST(JSONLD_expand, empty_object) { const auto input = sourcemeta::core::parse_json("{}"); - const auto expected = sourcemeta::core::parse_json("[]"); - EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); } @@ -330,3 +328,9 @@ TEST(JSONLD_expand, non_expandable_type_value_is_omitted) { EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); } + +TEST(JSONLD_expand, graph_value_expanding_to_null_yields_no_element) { + const auto input = sourcemeta::core::parse_json(R"({ "@graph": "scalar" })"); + const auto expected = sourcemeta::core::parse_json("[]"); + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} From 4f0d38745d73580917147ea7cc01fb7125cd42d7 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Fri, 26 Jun 2026 20:40:32 -0300 Subject: [PATCH 10/13] More Signed-off-by: Juan Cruz Viotti --- src/core/jsonld/jsonld_context_processing.cc | 4 ++- test/jsonld/jsonld_expand_test.cc | 31 ++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/core/jsonld/jsonld_context_processing.cc b/src/core/jsonld/jsonld_context_processing.cc index c7174491d..266454888 100644 --- a/src/core/jsonld/jsonld_context_processing.cc +++ b/src/core/jsonld/jsonld_context_processing.cc @@ -184,7 +184,9 @@ auto process_context(ExpansionState &state, ActiveContext &active_context, } if (const auto *base_entry{ - context.try_at(KEYWORD_BASE, KEYWORD_BASE_HASH)}) { + state.remote_context_chain.empty() + ? context.try_at(KEYWORD_BASE, KEYWORD_BASE_HASH) + : nullptr}) { const auto &base{*base_entry}; if (base.is_null()) { active_context.base = std::nullopt; diff --git a/test/jsonld/jsonld_expand_test.cc b/test/jsonld/jsonld_expand_test.cc index 3112573fc..5970a2eee 100644 --- a/test/jsonld/jsonld_expand_test.cc +++ b/test/jsonld/jsonld_expand_test.cc @@ -3,6 +3,8 @@ #include #include +#include // std::optional, std::nullopt + TEST(JSONLD_expand, empty_object) { const auto input = sourcemeta::core::parse_json("{}"); const auto expected = sourcemeta::core::parse_json("[]"); @@ -334,3 +336,32 @@ TEST(JSONLD_expand, graph_value_expanding_to_null_yields_no_element) { const auto expected = sourcemeta::core::parse_json("[]"); EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); } + +TEST(JSONLD_expand, base_in_remote_context_is_ignored) { + const sourcemeta::core::JSONLDResolver resolver = + [](const sourcemeta::core::JSON::StringView identifier) + -> std::optional { + if (identifier == "https://example.com/remote-base") { + return sourcemeta::core::parse_json( + R"({ "@context": { "@base": "http://remote.example/" } })"); + } + return std::nullopt; + }; + + const auto input = sourcemeta::core::parse_json(R"({ + "@context": "https://example.com/remote-base", + "@id": "relative-node", + "http://example.com/p": "v" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "@id": "http://doc.example/relative-node", + "http://example.com/p": [ { "@value": "v" } ] + } + ])"); + + EXPECT_EQ( + sourcemeta::core::jsonld_expand(input, "http://doc.example/", resolver), + expected); +} From 428c8f67d5c8b10b6fe004443eef80dfc308d917 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Fri, 26 Jun 2026 20:52:13 -0300 Subject: [PATCH 11/13] More Signed-off-by: Juan Cruz Viotti --- src/core/jsonld/jsonld_expansion.cc | 2 +- src/core/jsonld/jsonld_iri_expansion.cc | 8 ++++---- test/jsonld/jsonld_expand_test.cc | 23 +++++++++++++++++++++++ 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/core/jsonld/jsonld_expansion.cc b/src/core/jsonld/jsonld_expansion.cc index 9d7d9ef3b..367093022 100644 --- a/src/core/jsonld/jsonld_expansion.cc +++ b/src/core/jsonld/jsonld_expansion.cc @@ -621,7 +621,7 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, } const auto direction{definition->has_direction ? definition->direction - : active_context.default_direction}; + : value_context.default_direction}; if (direction.has_value()) { value.assign_assume_new(JSON::String{KEYWORD_DIRECTION}, JSON{direction.value()}, diff --git a/src/core/jsonld/jsonld_iri_expansion.cc b/src/core/jsonld/jsonld_iri_expansion.cc index ff6364cd7..736485ad4 100644 --- a/src/core/jsonld/jsonld_iri_expansion.cc +++ b/src/core/jsonld/jsonld_iri_expansion.cc @@ -21,8 +21,8 @@ auto expand_iri(ExpansionState &state, ActiveContext &active_context, return std::nullopt; } - if (local_context != nullptr && local_context->is_object() && - local_context->defines(value)) { + if (local_context != nullptr && defined != nullptr && + local_context->is_object() && local_context->defines(value)) { const auto iterator{defined->find(value)}; if (iterator == defined->cend() || !iterator->second) { create_term_definition(state, active_context, *local_context, value, @@ -49,8 +49,8 @@ auto expand_iri(ExpansionState &state, ActiveContext &active_context, return value; } - if (local_context != nullptr && local_context->is_object() && - local_context->defines(prefix)) { + if (local_context != nullptr && defined != nullptr && + local_context->is_object() && local_context->defines(prefix)) { const auto iterator{defined->find(prefix)}; if (iterator == defined->cend() || !iterator->second) { create_term_definition(state, active_context, *local_context, prefix, diff --git a/test/jsonld/jsonld_expand_test.cc b/test/jsonld/jsonld_expand_test.cc index 5970a2eee..8c94913c8 100644 --- a/test/jsonld/jsonld_expand_test.cc +++ b/test/jsonld/jsonld_expand_test.cc @@ -365,3 +365,26 @@ TEST(JSONLD_expand, base_in_remote_context_is_ignored) { sourcemeta::core::jsonld_expand(input, "http://doc.example/", resolver), expected); } + +TEST(JSONLD_expand, language_map_direction_uses_property_scoped_context) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "p": { + "@id": "http://example.com/p", + "@container": "@language", + "@context": { "@direction": "rtl" } + } + }, + "p": { "en": "hello" } + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/p": [ + { "@value": "hello", "@language": "en", "@direction": "rtl" } + ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} From 506f83d58f0e0586617fc9038c0fa6ffa33fc496 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Fri, 26 Jun 2026 21:03:07 -0300 Subject: [PATCH 12/13] More Signed-off-by: Juan Cruz Viotti --- test/jsonld/jsonld_suite.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/jsonld/jsonld_suite.cc b/test/jsonld/jsonld_suite.cc index 055a3ec12..f6ed35f83 100644 --- a/test/jsonld/jsonld_suite.cc +++ b/test/jsonld/jsonld_suite.cc @@ -37,8 +37,10 @@ class JSONLDExpandTest : public testing::Test { if (!identifier.starts_with(test_case.base_prefix)) { return std::nullopt; } + // Dereferencing a context IRI ignores any fragment or query component. + const auto suffix{identifier.substr(test_case.base_prefix.size())}; const auto path{test_case.suite_root / - identifier.substr(test_case.base_prefix.size())}; + suffix.substr(0, suffix.find_first_of("#?"))}; if (!std::filesystem::exists(path)) { return std::nullopt; } From e362a2e19e40d5d495a1b4cce4fc60ebc4f290c4 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Fri, 26 Jun 2026 21:25:07 -0300 Subject: [PATCH 13/13] More Signed-off-by: Juan Cruz Viotti --- .../include/sourcemeta/core/jsonld_error.h | 4 ++-- .../jsonld/jsonld_create_term_definition.cc | 7 ++++++ src/core/jsonld/jsonld_expansion.cc | 6 +++-- test/jsonld/jsonld_expand_error_test.cc | 22 +++++++++++++++++++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/core/jsonld/include/sourcemeta/core/jsonld_error.h b/src/core/jsonld/include/sourcemeta/core/jsonld_error.h index a91536f97..4ba412424 100644 --- a/src/core/jsonld/include/sourcemeta/core/jsonld_error.h +++ b/src/core/jsonld/include/sourcemeta/core/jsonld_error.h @@ -45,7 +45,7 @@ class SOURCEMETA_CORE_JSONLD_EXPORT JSONLDError : public std::exception { } [[nodiscard]] auto what() const noexcept -> const char * override { - return this->code_; + return this->code_.c_str(); } /// Get the JSON Pointer to the position in the input document that caused the @@ -55,7 +55,7 @@ class SOURCEMETA_CORE_JSONLD_EXPORT JSONLDError : public std::exception { } private: - const char *code_; + JSON::String code_; Pointer pointer_; }; diff --git a/src/core/jsonld/jsonld_create_term_definition.cc b/src/core/jsonld/jsonld_create_term_definition.cc index 95cefd91e..43dd48aef 100644 --- a/src/core/jsonld/jsonld_create_term_definition.cc +++ b/src/core/jsonld/jsonld_create_term_definition.cc @@ -354,6 +354,13 @@ auto create_term_definition(ExpansionState &state, throw JSONLDError("Invalid container mapping", term_pointer, {KEYWORD_CONTAINER}); } + // A keyword may not appear more than once in the container array. + for (const auto &seen : definition.container) { + if (seen == item_string) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + } definition.container.push_back(item_string); } } else if (container.is_string()) { diff --git a/src/core/jsonld/jsonld_expansion.cc b/src/core/jsonld/jsonld_expansion.cc index 367093022..a79bbc049 100644 --- a/src/core/jsonld/jsonld_expansion.cc +++ b/src/core/jsonld/jsonld_expansion.cc @@ -507,13 +507,14 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, // inherit an enclosing type-scoped revert. It may, however, set its // own revert when it specifies @propagate: false. scoped_context.previous = nullptr; + const auto saved_override{state.protected_override}; state.protected_override = true; const auto saved_base{state.context_base_override}; state.context_base_override = definition->context_base; process_context(state, scoped_context, definition->context.value(), entry_pointer); state.context_base_override = saved_base; - state.protected_override = false; + state.protected_override = saved_override; } ActiveContext &value_context{scoped ? scoped_context : active_context}; @@ -845,10 +846,11 @@ auto expand_entries(ExpansionState &state, ActiveContext &active_context, ActiveContext nested{active_context}; const auto saved_base{state.context_base_override}; state.context_base_override = definition->second.context_base; + const auto saved_override{state.protected_override}; state.protected_override = true; process_context(state, nested, definition->second.context.value(), nest_pointer); - state.protected_override = false; + state.protected_override = saved_override; state.context_base_override = saved_base; nested.previous = nullptr; expand_entries(state, nested, type_context, result, active_property, diff --git a/test/jsonld/jsonld_expand_error_test.cc b/test/jsonld/jsonld_expand_error_test.cc index 3a6350d01..db002f007 100644 --- a/test/jsonld/jsonld_expand_error_test.cc +++ b/test/jsonld/jsonld_expand_error_test.cc @@ -5,6 +5,7 @@ #include #include // std::optional, std::nullopt +#include // std::string #define EXPECT_JSONLD_EXPAND_ERROR(expression, expected_code, \ expected_pointer) \ @@ -597,3 +598,24 @@ TEST(JSONLD_expand_error, import_loading_failed) { sourcemeta::core::jsonld_expand(input, "", remote_resolver()), "Loading remote context failed", "/@context/@import"); } + +TEST(JSONLD_expand_error, duplicate_container_keyword) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "a": { "@id": "http://example.com/a", "@container": [ "@set", "@set" ] } + } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid container mapping", + "/@context/a/@container"); +} + +TEST(JSONLD_expand_error, error_code_value_is_owned) { + std::string code{"A custom error code longer than small string optimization"}; + const sourcemeta::core::JSONLDError error{code.c_str(), + sourcemeta::core::Pointer{}}; + code = std::string{}; + EXPECT_STREQ(error.what(), + "A custom error code longer than small string optimization"); +}