diff --git a/.github/workflows/website-build.yml b/.github/workflows/website-build.yml index 884083204..ccbb87279 100644 --- a/.github/workflows/website-build.yml +++ b/.github/workflows/website-build.yml @@ -40,6 +40,7 @@ jobs: -DSOURCEMETA_CORE_JSON:BOOL=OFF -DSOURCEMETA_CORE_JSONL:BOOL=OFF -DSOURCEMETA_CORE_JSONPOINTER:BOOL=OFF + -DSOURCEMETA_CORE_JSONLD:BOOL=OFF -DSOURCEMETA_CORE_YAML:BOOL=OFF -DSOURCEMETA_CORE_JSONRPC:BOOL=OFF -DSOURCEMETA_CORE_MCP:BOOL=OFF diff --git a/.github/workflows/website-deploy.yml b/.github/workflows/website-deploy.yml index 013b71d0d..8c83c15d2 100644 --- a/.github/workflows/website-deploy.yml +++ b/.github/workflows/website-deploy.yml @@ -50,6 +50,7 @@ jobs: -DSOURCEMETA_CORE_JSON:BOOL=OFF -DSOURCEMETA_CORE_JSONL:BOOL=OFF -DSOURCEMETA_CORE_JSONPOINTER:BOOL=OFF + -DSOURCEMETA_CORE_JSONLD:BOOL=OFF -DSOURCEMETA_CORE_YAML:BOOL=OFF -DSOURCEMETA_CORE_JSONRPC:BOOL=OFF -DSOURCEMETA_CORE_MCP:BOOL=OFF diff --git a/CMakeLists.txt b/CMakeLists.txt index 22f9a9a78..e45099d99 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ option(SOURCEMETA_CORE_URI "Build the Sourcemeta Core URI library" ON) option(SOURCEMETA_CORE_URITEMPLATE "Build the Sourcemeta Core URI Template library" ON) option(SOURCEMETA_CORE_JSON "Build the Sourcemeta Core JSON library" ON) option(SOURCEMETA_CORE_JSONPOINTER "Build the Sourcemeta Core JSON Pointer library" ON) +option(SOURCEMETA_CORE_JSONLD "Build the Sourcemeta Core JSON-LD library" ON) option(SOURCEMETA_CORE_JSONL "Build the Sourcemeta Core JSONL library" ON) option(SOURCEMETA_CORE_YAML "Build the Sourcemeta Core YAML library" ON) option(SOURCEMETA_CORE_JSONRPC "Build the Sourcemeta Core JSON-RPC library" ON) @@ -171,6 +172,10 @@ if(SOURCEMETA_CORE_JSONPOINTER) add_subdirectory(src/core/jsonpointer) endif() +if(SOURCEMETA_CORE_JSONLD) + add_subdirectory(src/core/jsonld) +endif() + if(SOURCEMETA_CORE_GZIP) find_package(LibDeflate REQUIRED) add_subdirectory(src/core/gzip) @@ -338,6 +343,10 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/jsonpointer) endif() + if(SOURCEMETA_CORE_JSONLD) + add_subdirectory(test/jsonld) + endif() + if(SOURCEMETA_CORE_GZIP) add_subdirectory(test/gzip) endif() diff --git a/config.cmake.in b/config.cmake.in index 5867d812e..889e1c798 100644 --- a/config.cmake.in +++ b/config.cmake.in @@ -24,6 +24,7 @@ if(NOT SOURCEMETA_CORE_COMPONENTS) list(APPEND SOURCEMETA_CORE_COMPONENTS json) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonl) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonpointer) + list(APPEND SOURCEMETA_CORE_COMPONENTS jsonld) list(APPEND SOURCEMETA_CORE_COMPONENTS yaml) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonrpc) list(APPEND SOURCEMETA_CORE_COMPONENTS mcp) @@ -136,6 +137,20 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") + elseif(component STREQUAL "jsonld") + find_dependency(PCRE2 CONFIG) + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_text.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_langtag.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonld.cmake") elseif(component STREQUAL "yaml") find_dependency(PCRE2 CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") diff --git a/src/core/jsonld/CMakeLists.txt b/src/core/jsonld/CMakeLists.txt new file mode 100644 index 000000000..66152d423 --- /dev/null +++ b/src/core/jsonld/CMakeLists.txt @@ -0,0 +1,21 @@ +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME jsonld + PRIVATE_HEADERS error.h + SOURCES jsonld.cc + jsonld_iri_expansion.cc jsonld_create_term_definition.cc + jsonld_context_processing.cc jsonld_value_expansion.cc jsonld_expansion.cc + jsonld_algorithms.h jsonld_keywords.h) + +if(SOURCEMETA_CORE_INSTALL) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME jsonld) +endif() + +target_link_libraries(sourcemeta_core_jsonld + PUBLIC sourcemeta::core::json) +target_link_libraries(sourcemeta_core_jsonld + PUBLIC sourcemeta::core::jsonpointer) +target_link_libraries(sourcemeta_core_jsonld + PRIVATE sourcemeta::core::uri) +target_link_libraries(sourcemeta_core_jsonld + PRIVATE sourcemeta::core::langtag) +target_link_libraries(sourcemeta_core_jsonld + PRIVATE sourcemeta::core::text) diff --git a/src/core/jsonld/include/sourcemeta/core/jsonld.h b/src/core/jsonld/include/sourcemeta/core/jsonld.h new file mode 100644 index 000000000..c54ec3fdf --- /dev/null +++ b/src/core/jsonld/include/sourcemeta/core/jsonld.h @@ -0,0 +1,88 @@ +#ifndef SOURCEMETA_CORE_JSONLD_H_ +#define SOURCEMETA_CORE_JSONLD_H_ + +#ifndef SOURCEMETA_CORE_JSONLD_EXPORT +#include +#endif + +#include +#include + +#include // std::uint8_t +#include // std::function +#include // std::optional + +/// @defgroup jsonld JSON-LD +/// @brief A JSON-LD 1.1 processor, with support for the JSON-LD 1.0 processing +/// mode. +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +namespace sourcemeta::core { + +/// @ingroup jsonld +/// A resolver callback for loading remote JSON-LD contexts referenced during +/// expansion. Given an absolute IRI, it returns the referenced document, or no +/// value if it cannot be resolved. +using JSONLDResolver = std::function(JSON::StringView)>; + +/// @ingroup jsonld +/// The JSON-LD processing mode +enum class JSONLDVersion : std::uint8_t { V1_0, V1_1 }; + +/// @ingroup jsonld +/// +/// Expand a JSON-LD document into its expanded form, resolving relative +/// references against the given base IRI and loading any remote context through +/// the given resolver. The result is always a JSON array. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// const auto document{sourcemeta::core::parse_json(R"({ +/// "@context": { "name": "https://schema.org/name" }, +/// "name": "Sourcemeta" +/// })")}; +/// const auto expanded{sourcemeta::core::jsonld_expand(document)}; +/// sourcemeta::core::prettify(expanded, std::cout); +/// std::cout << std::endl; +/// ``` +SOURCEMETA_CORE_JSONLD_EXPORT +auto jsonld_expand(const JSON &input, const JSON::StringView base_iri = "", + const JSONLDResolver &resolver = {}, + const JSONLDVersion version = JSONLDVersion::V1_1) -> JSON; + +/// @ingroup jsonld +/// +/// Expand a JSON-LD document, applying the given expansion context before the +/// document's own context, as if it had been prepended to the input. For +/// example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// const auto document{sourcemeta::core::parse_json( +/// R"({ "name": "Sourcemeta" })")}; +/// const auto context{sourcemeta::core::parse_json( +/// R"({ "name": "https://schema.org/name" })")}; +/// const auto expanded{sourcemeta::core::jsonld_expand(document, context)}; +/// sourcemeta::core::prettify(expanded, std::cout); +/// std::cout << std::endl; +/// ``` +SOURCEMETA_CORE_JSONLD_EXPORT +auto jsonld_expand(const JSON &input, const JSON &expand_context, + const JSON::StringView base_iri = "", + const JSONLDResolver &resolver = {}, + const JSONLDVersion version = JSONLDVersion::V1_1) -> JSON; + +} // namespace sourcemeta::core + +#endif diff --git a/src/core/jsonld/include/sourcemeta/core/jsonld_error.h b/src/core/jsonld/include/sourcemeta/core/jsonld_error.h new file mode 100644 index 000000000..4ba412424 --- /dev/null +++ b/src/core/jsonld/include/sourcemeta/core/jsonld_error.h @@ -0,0 +1,68 @@ +#ifndef SOURCEMETA_CORE_JSONLD_ERROR_H_ +#define SOURCEMETA_CORE_JSONLD_ERROR_H_ + +#ifndef SOURCEMETA_CORE_JSONLD_EXPORT +#include +#endif + +#include +#include + +#include // std::exception +#include // std::initializer_list +#include // std::move + +namespace sourcemeta::core { + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251 4275) +#endif + +/// @ingroup jsonld +/// An error that represents a JSON-LD processing failure. The message is one of +/// the error codes defined by the JSON-LD 1.1 API specification, and the +/// pointer locates the offending position in the input document +class SOURCEMETA_CORE_JSONLD_EXPORT JSONLDError : public std::exception { +public: + JSONLDError(const char *code, Pointer pointer) + : code_{code}, pointer_{std::move(pointer)} {} + + // Locate the error at a weak pointer, materialising an owned pointer. + JSONLDError(const char *code, const WeakPointer &pointer) + : code_{code}, pointer_{to_pointer(pointer)} {} + + // Locate the error at a weak pointer extended with the given trailing + // property tokens. + JSONLDError(const char *code, const WeakPointer &pointer, + const std::initializer_list children) + : code_{code}, pointer_{to_pointer(pointer)} { + for (const auto child : children) { + this->pointer_.push_back(JSON::String{child}); + } + } + + [[nodiscard]] auto what() const noexcept -> const char * override { + return this->code_.c_str(); + } + + /// Get the JSON Pointer to the position in the input document that caused the + /// error + [[nodiscard]] auto pointer() const noexcept -> const Pointer & { + return this->pointer_; + } + +private: + JSON::String code_; + Pointer pointer_; +}; + +#if defined(_MSC_VER) +#pragma warning(default : 4251 4275) +#endif + +} // namespace sourcemeta::core + +#endif diff --git a/src/core/jsonld/jsonld.cc b/src/core/jsonld/jsonld.cc new file mode 100644 index 000000000..0ee90af8e --- /dev/null +++ b/src/core/jsonld/jsonld.cc @@ -0,0 +1,88 @@ +#include +#include +#include + +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::nullopt +#include // std::move + +namespace sourcemeta::core { + +// Run the expansion algorithm on a top-level input document and normalise the +// result into the expanded document form (JSON-LD 1.1 API Section 5.1). +auto run_expansion(ExpansionState &state, ActiveContext &active_context, + const JSON &input) -> JSON { + auto expanded{ + expand(state, active_context, std::nullopt, input, empty_weak_pointer)}; + + // A top-level map containing only @graph is replaced by its value. + if (expanded.is_object() && expanded.object_size() == 1 && + expanded.defines(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH)) { + expanded = expanded.at(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH); + } + + if (expanded.is_object()) { + if (expanded.empty() || (expanded.object_size() == 1 && + expanded.defines(KEYWORD_ID, KEYWORD_ID_HASH))) { + return JSON::make_array(); + } + auto result{JSON::make_array()}; + result.push_back(std::move(expanded)); + return result; + } + + if (!expanded.is_array()) { + return JSON::make_array(); + } + + return expanded; +} + +// Set up the shared expansion state and initial active context from the public +// entry-point arguments. +auto initialise_expansion(const JSONLDResolver &resolver, + const JSON::StringView base_iri, + const JSONLDVersion version, ExpansionState &state, + ActiveContext &active_context) -> void { + state.resolver = &resolver; + state.processing_1_0 = version == JSONLDVersion::V1_0; + if (!base_iri.empty()) { + active_context.base = JSON::String{base_iri}; + state.document_base = JSON::String{base_iri}; + } +} + +} // namespace sourcemeta::core + +namespace sourcemeta::core { + +auto jsonld_expand(const JSON &input, const JSON::StringView base_iri, + const JSONLDResolver &resolver, const JSONLDVersion version) + -> JSON { + ExpansionState state; + ActiveContext active_context; + initialise_expansion(resolver, base_iri, version, state, active_context); + return run_expansion(state, active_context, input); +} + +auto jsonld_expand(const JSON &input, const JSON &expand_context, + const JSON::StringView base_iri, + const JSONLDResolver &resolver, const JSONLDVersion version) + -> JSON { + ExpansionState state; + ActiveContext active_context; + initialise_expansion(resolver, base_iri, version, state, active_context); + const auto &context{ + expand_context.is_object() && + expand_context.defines(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + ? expand_context.at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + : expand_context}; + // The external expansion context is not part of the input document, so its + // errors carry an empty pointer. + process_context(state, active_context, context, empty_weak_pointer); + return run_expansion(state, active_context, input); +} + +} // namespace sourcemeta::core diff --git a/src/core/jsonld/jsonld_algorithms.h b/src/core/jsonld/jsonld_algorithms.h new file mode 100644 index 000000000..53df79a3f --- /dev/null +++ b/src/core/jsonld/jsonld_algorithms.h @@ -0,0 +1,107 @@ +#ifndef SOURCEMETA_CORE_JSONLD_ALGORITHMS_H_ +#define SOURCEMETA_CORE_JSONLD_ALGORITHMS_H_ + +#include +#include +#include + +#include // std::less +#include // std::map +#include // std::shared_ptr +#include // std::optional +#include // std::vector + +namespace sourcemeta::core { + +struct TermDefinition { + std::optional iri; + std::optional type_mapping; + std::vector container; + std::optional language; + bool has_language{false}; + std::optional direction; + bool has_direction{false}; + std::optional context; + std::optional context_base; + std::optional index; + bool reverse{false}; + bool prefix{false}; + bool is_protected{false}; +}; + +struct ActiveContext { + std::map> terms; + std::optional base; + std::optional vocabulary; + std::optional default_language; + std::optional default_direction; + std::shared_ptr previous; +}; + +// The mutable state shared by the expansion algorithms for the duration of a +// single top-level expansion. +struct ExpansionState { + // Used to load remote contexts. The chain detects recursive inclusion. + const JSONLDResolver *resolver{nullptr}; + std::vector remote_context_chain; + std::optional document_base; + // When a scoped context is processed after the fact, remote references in it + // resolve against the URL of the document that defined the term. + std::optional context_base_override; + // Protected-term state for the context currently being processed. + bool context_protected{false}; + bool protected_override{false}; + bool processing_1_0{false}; + + // Remote context references resolve against the URL of the document that + // contains them: the current remote context if any, otherwise the input + // document. This is distinct from the active context base, which @base + // mutates. + [[nodiscard]] auto context_resolution_base() const + -> std::optional { + if (!this->remote_context_chain.empty()) { + return this->remote_context_chain.back(); + } + if (this->context_base_override.has_value()) { + return this->context_base_override; + } + return this->document_base; + } +}; + +// Tracks, while a context is being processed, which terms have been fully +// defined (true) versus are still being defined (false, used to detect cycles). +using DefinedTerms = std::map; + +// IRI Expansion (JSON-LD 1.1 API Section 5.2) +auto expand_iri(ExpansionState &state, ActiveContext &active_context, + const JSON::String &value, const bool document_relative, + const bool vocabulary, const JSON *const local_context, + DefinedTerms *const defined, const WeakPointer &context_pointer) + -> std::optional; + +// Create Term Definition (JSON-LD 1.1 API Section 5.1.1) +auto create_term_definition(ExpansionState &state, + ActiveContext &active_context, + const JSON &local_context, const JSON::String &term, + DefinedTerms &defined, + const WeakPointer &context_pointer) -> void; + +// Context Processing (JSON-LD 1.1 API Section 5.1) +auto process_context(ExpansionState &state, ActiveContext &active_context, + const JSON &local_context, const WeakPointer &pointer, + const bool propagate = true) -> void; + +// Value Expansion (JSON-LD 1.1 API Section 5.3.3) +auto expand_value(ExpansionState &state, ActiveContext &active_context, + const std::optional &active_property, + const JSON &value) -> JSON; + +// Expansion (JSON-LD 1.1 API Section 5.1.2) +auto expand(ExpansionState &state, ActiveContext &active_context, + const std::optional &active_property, + const JSON &element, const WeakPointer &pointer) -> JSON; + +} // namespace sourcemeta::core + +#endif diff --git a/src/core/jsonld/jsonld_context_processing.cc b/src/core/jsonld/jsonld_context_processing.cc new file mode 100644 index 000000000..266454888 --- /dev/null +++ b/src/core/jsonld/jsonld_context_processing.cc @@ -0,0 +1,278 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include + +#include // std::make_shared +#include // std::optional +#include // std::move, std::pair +#include // std::vector + +namespace sourcemeta::core { + +// Context Processing (JSON-LD 1.1 API Section 5.1) +auto process_context(ExpansionState &state, ActiveContext &active_context, + const JSON &local_context, const WeakPointer &pointer, + const bool propagate) -> void { + std::vector> contexts; + if (local_context.is_array()) { + std::size_t index{0}; + for (const auto &item : local_context.as_array()) { + contexts.emplace_back(&item, pointer.concat(index)); + index += 1; + } + } else { + contexts.emplace_back(&local_context, pointer); + } + + // The @propagate flag is read once from a top-level map context, before the + // contexts are processed (JSON-LD 1.1 API Section 5.1 steps 2 and 3). A + // non-boolean value is reported per entry by the loop below. + bool effective_propagate{propagate}; + if (local_context.is_object()) { + if (const auto *propagate_entry{ + local_context.try_at(KEYWORD_PROPAGATE, KEYWORD_PROPAGATE_HASH)}; + propagate_entry != nullptr && propagate_entry->is_boolean()) { + effective_propagate = propagate_entry->to_boolean(); + } + } + if (!effective_propagate && !active_context.previous) { + auto snapshot{std::make_shared(active_context)}; + snapshot->previous = nullptr; + active_context.previous = snapshot; + } + + for (const auto &[entry_pointer, location] : contexts) { + const auto &context{*entry_pointer}; + if (context.is_null()) { + if (!state.protected_override) { + for (const auto &entry : active_context.terms) { + if (entry.second.is_protected) { + throw JSONLDError("Invalid context nullification", location); + } + } + } + // Nullifying the context resets to the initial context, whose base is + // the document base. + active_context = ActiveContext{}; + active_context.base = state.document_base; + continue; + } + + if (context.is_string()) { + auto reference{context.to_string()}; + const auto resolution_base{state.context_resolution_base()}; + if (resolution_base.has_value()) { + reference = URI::from_iri(reference) + .resolve_from(URI::from_iri(resolution_base.value())) + .recompose(); + } + for (const auto &loaded : state.remote_context_chain) { + if (loaded == reference) { + throw JSONLDError("Recursive context inclusion", location); + } + } + if (state.resolver == nullptr || !*state.resolver) { + throw JSONLDError("Loading remote context failed", location); + } + const auto document{(*state.resolver)(reference)}; + if (!document.has_value()) { + throw JSONLDError("Loading remote context failed", location); + } + const auto *context_entry{ + document->is_object() + ? document->try_at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + : nullptr}; + if (context_entry == nullptr) { + throw JSONLDError("Invalid remote context", location); + } + state.remote_context_chain.push_back(reference); + try { + // A loaded remote context is processed with the default propagation. + process_context(state, active_context, *context_entry, location); + } catch (...) { + state.remote_context_chain.pop_back(); + throw; + } + state.remote_context_chain.pop_back(); + continue; + } + + if (!context.is_object()) { + throw JSONLDError("Invalid local context", location); + } + + if (const auto *version{ + context.try_at(KEYWORD_VERSION, KEYWORD_VERSION_HASH)}; + version != nullptr && + (!version->is_real() || version->to_real() != 1.1)) { + throw JSONLDError("Invalid @version value", location, {KEYWORD_VERSION}); + } + if (state.processing_1_0 && + context.defines(KEYWORD_VERSION, KEYWORD_VERSION_HASH)) { + throw JSONLDError("Processing mode conflict", location, + {KEYWORD_VERSION}); + } + if (state.processing_1_0 && + (context.defines(KEYWORD_PROPAGATE, KEYWORD_PROPAGATE_HASH) || + context.defines(KEYWORD_IMPORT, KEYWORD_IMPORT_HASH) || + context.defines(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH))) { + throw JSONLDError("Invalid context entry", location); + } + + if (const auto *propagate_entry{ + context.try_at(KEYWORD_PROPAGATE, KEYWORD_PROPAGATE_HASH)}; + propagate_entry != nullptr && !propagate_entry->is_boolean()) { + throw JSONLDError("Invalid @propagate value", location, + {KEYWORD_PROPAGATE}); + } + + // @protected applies to imported terms too, so it is set before @import. + const bool saved_protected{state.context_protected}; + if (const auto *protected_entry{ + context.try_at(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH)}) { + if (!protected_entry->is_boolean()) { + throw JSONLDError("Invalid @protected value", location, + {KEYWORD_PROTECTED}); + } + state.context_protected = protected_entry->to_boolean(); + } + + if (const auto *import_entry{ + context.try_at(KEYWORD_IMPORT, KEYWORD_IMPORT_HASH)}) { + const auto &import{*import_entry}; + if (!import.is_string()) { + throw JSONLDError("Invalid @import value", location, {KEYWORD_IMPORT}); + } + auto reference{import.to_string()}; + const auto resolution_base{state.context_resolution_base()}; + if (resolution_base.has_value()) { + reference = URI::from_iri(reference) + .resolve_from(URI::from_iri(resolution_base.value())) + .recompose(); + } + if (state.resolver == nullptr || !*state.resolver) { + throw JSONLDError("Loading remote context failed", location, + {KEYWORD_IMPORT}); + } + const auto document{(*state.resolver)(reference)}; + if (!document.has_value()) { + throw JSONLDError("Loading remote context failed", location, + {KEYWORD_IMPORT}); + } + const auto *imported_context{ + document->is_object() + ? document->try_at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + : nullptr}; + if (imported_context == nullptr || !imported_context->is_object()) { + throw JSONLDError("Invalid remote context", location, {KEYWORD_IMPORT}); + } + if (imported_context->defines(KEYWORD_IMPORT, KEYWORD_IMPORT_HASH)) { + throw JSONLDError("Invalid context entry", location, {KEYWORD_IMPORT}); + } + // Merge the imported entries with the current ones, the current ones + // overriding, and process the result as a single context. + auto merged{JSON{*imported_context}}; + for (const auto &entry : context.as_object()) { + if (JSON::StringView{entry.first} != KEYWORD_IMPORT) { + merged.assign(entry.first, entry.second); + } + } + process_context(state, active_context, merged, location, propagate); + state.context_protected = saved_protected; + continue; + } + + if (const auto *base_entry{ + state.remote_context_chain.empty() + ? context.try_at(KEYWORD_BASE, KEYWORD_BASE_HASH) + : nullptr}) { + const auto &base{*base_entry}; + if (base.is_null()) { + active_context.base = std::nullopt; + } else if (!base.is_string()) { + throw JSONLDError("Invalid base IRI", location, {KEYWORD_BASE}); + } else { + const auto &base_string{base.to_string()}; + if (active_context.base.has_value()) { + active_context.base = + URI::from_iri(base_string) + .resolve_from(URI::from_iri(active_context.base.value())) + .recompose(); + } else if (URI::from_iri(base_string).is_absolute()) { + active_context.base = base_string; + } else { + throw JSONLDError("Invalid base IRI", location, {KEYWORD_BASE}); + } + } + } + + if (const auto *vocabulary_entry{ + context.try_at(KEYWORD_VOCAB, KEYWORD_VOCAB_HASH)}) { + const auto &vocabulary{*vocabulary_entry}; + if (vocabulary.is_null()) { + active_context.vocabulary = std::nullopt; + } else if (!vocabulary.is_string()) { + throw JSONLDError("Invalid vocab mapping", location, {KEYWORD_VOCAB}); + } else { + const auto &vocabulary_string{vocabulary.to_string()}; + // In 1.0, @vocab must be an absolute IRI or blank node identifier. + if (state.processing_1_0 && + vocabulary_string.find(':') == JSON::String::npos) { + throw JSONLDError("Invalid vocab mapping", location, {KEYWORD_VOCAB}); + } + active_context.vocabulary = + expand_iri(state, active_context, vocabulary_string, true, true, + nullptr, nullptr, empty_weak_pointer); + } + } + + if (const auto *language_entry{ + context.try_at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH)}) { + const auto &language{*language_entry}; + if (language.is_null()) { + active_context.default_language = std::nullopt; + } else if (!language.is_string()) { + throw JSONLDError("Invalid default language", location, + {KEYWORD_LANGUAGE}); + } else { + active_context.default_language = language.to_string(); + } + } + + if (const auto *direction_entry{ + context.try_at(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH)}) { + const auto &direction{*direction_entry}; + if (direction.is_null()) { + active_context.default_direction = std::nullopt; + } else if (!direction.is_string()) { + throw JSONLDError("Invalid base direction", location, + {KEYWORD_DIRECTION}); + } else { + const auto &direction_string{direction.to_string()}; + if (direction_string != "ltr" && direction_string != "rtl") { + throw JSONLDError("Invalid base direction", location, + {KEYWORD_DIRECTION}); + } + active_context.default_direction = direction_string; + } + } + + DefinedTerms defined; + for (const auto &entry : context.as_object()) { + const auto &name{entry.first}; + if (name == KEYWORD_BASE || name == KEYWORD_VOCAB || + name == KEYWORD_LANGUAGE || name == KEYWORD_VERSION || + name == KEYWORD_DIRECTION || name == KEYWORD_IMPORT || + name == KEYWORD_PROPAGATE || name == KEYWORD_PROTECTED) { + continue; + } + create_term_definition(state, active_context, context, name, defined, + location); + } + + state.context_protected = saved_protected; + } +} + +} // namespace sourcemeta::core diff --git a/src/core/jsonld/jsonld_create_term_definition.cc b/src/core/jsonld/jsonld_create_term_definition.cc new file mode 100644 index 000000000..43dd48aef --- /dev/null +++ b/src/core/jsonld/jsonld_create_term_definition.cc @@ -0,0 +1,611 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include + +#include // std::optional +#include // std::move + +namespace sourcemeta::core { + +namespace { + +// Whether the given value is a valid @container value. +auto is_valid_container(const JSON::StringView value) -> bool { + return value == KEYWORD_LIST || value == KEYWORD_SET || + value == KEYWORD_INDEX || value == KEYWORD_LANGUAGE || + value == KEYWORD_ID || value == KEYWORD_TYPE || value == KEYWORD_GRAPH; +} + +// Whether the given value ends with a URI generic delimiter (RFC 3986). +auto ends_with_gen_delim(const JSON::StringView value) -> bool { + return !value.empty() && URI::is_gen_delim(value.back()); +} + +// Whether two definitions are equivalent ignoring their protected status, which +// is what a protected-term redefinition check compares. +auto same_definition(const TermDefinition &left, const TermDefinition &right) + -> bool { + return left.iri == right.iri && left.type_mapping == right.type_mapping && + left.container == right.container && left.language == right.language && + left.has_language == right.has_language && + left.direction == right.direction && + left.has_direction == right.has_direction && + left.context == right.context && + left.context_base == right.context_base && left.index == right.index && + left.reverse == right.reverse && left.prefix == right.prefix; +} + +// Store a freshly-built term definition, enforcing protected-term redefinition. +auto finalize_definition(ExpansionState &state, ActiveContext &active_context, + DefinedTerms &defined, const JSON::String &term, + const WeakPointer &term_pointer, + const std::optional &previous, + TermDefinition &&candidate) -> void { + if (previous.has_value() && previous->is_protected && + !state.protected_override) { + if (!same_definition(previous.value(), candidate)) { + throw JSONLDError("Protected term redefinition", term_pointer); + } + // A redefinition with the same definition retains the protected flag. + candidate.is_protected = true; + } + active_context.terms[term] = std::move(candidate); + defined[term] = true; +} + +} // namespace + +// Create Term Definition (JSON-LD 1.1 API Section 5.1.1) +auto create_term_definition(ExpansionState &state, + ActiveContext &active_context, + const JSON &local_context, const JSON::String &term, + DefinedTerms &defined, + const WeakPointer &context_pointer) -> void { + const auto status{defined.find(term)}; + if (status != defined.cend()) { + if (status->second) { + return; + } + throw JSONLDError("Cyclic IRI mapping", context_pointer.concat(term)); + } + + if (term.empty()) { + throw JSONLDError("Invalid term definition", context_pointer); + } + + defined[term] = false; + const auto &value{local_context.at(term)}; + const WeakPointer term_pointer{context_pointer.concat(term)}; + + if (is_keyword(term)) { + if (term == KEYWORD_TYPE && value.is_object() && !state.processing_1_0) { + TermDefinition type_definition; + bool has_container{false}; + bool invalid_entry{false}; + for (const auto &entry : value.as_object()) { + const auto &name{entry.first}; + if (name == KEYWORD_PROTECTED) { + if (!entry.second.is_boolean()) { + throw JSONLDError("Invalid @protected value", term_pointer, + {KEYWORD_PROTECTED}); + } + type_definition.is_protected = entry.second.to_boolean(); + } else if (name == KEYWORD_CONTAINER && entry.second.is_string()) { + const auto &container{entry.second.to_string()}; + if (container == KEYWORD_SET) { + type_definition.container.push_back(container); + has_container = true; + } else { + invalid_entry = true; + } + } else { + invalid_entry = true; + } + } + // A redefinition of a protected @type is rejected before the shape of + // the new definition is validated. + const auto existing_type{active_context.terms.find(KEYWORD_TYPE)}; + if (existing_type != active_context.terms.cend() && + existing_type->second.is_protected && !state.protected_override) { + if (!same_definition(existing_type->second, type_definition)) { + throw JSONLDError("Protected term redefinition", term_pointer); + } + type_definition.is_protected = true; + } else if (invalid_entry || !has_container) { + throw JSONLDError("Keyword redefinition", term_pointer); + } else if (!type_definition.is_protected) { + type_definition.is_protected = state.context_protected; + } + active_context.terms[JSON::String{KEYWORD_TYPE}] = + std::move(type_definition); + defined[term] = true; + return; + } + throw JSONLDError("Keyword redefinition", term_pointer); + } + + if (has_keyword_form(term)) { + defined[term] = true; + return; + } + + std::optional previous; + const auto existing{active_context.terms.find(term)}; + if (existing != active_context.terms.cend()) { + previous = existing->second; + } + active_context.terms.erase(term); + + const auto *id_entry{ + value.is_object() ? value.try_at(KEYWORD_ID, KEYWORD_ID_HASH) : nullptr}; + if (value.is_null() || (id_entry != nullptr && id_entry->is_null())) { + TermDefinition empty; + empty.is_protected = state.context_protected; + // @protected is processed before the null @id is handled, so an explicitly + // protected term that maps to null stays protected. + if (id_entry != nullptr) { + if (const auto *protected_entry{ + value.try_at(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH)}) { + if (!protected_entry->is_boolean()) { + throw JSONLDError("Invalid @protected value", term_pointer, + {KEYWORD_PROTECTED}); + } + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_PROTECTED}); + } + empty.is_protected = protected_entry->to_boolean(); + } + } + finalize_definition(state, active_context, defined, term, term_pointer, + previous, std::move(empty)); + return; + } + + TermDefinition definition; + definition.is_protected = state.context_protected; + bool simple_term{false}; + + if (value.is_string()) { + simple_term = true; + const auto &string_value{value.to_string()}; + if (!is_keyword(string_value) && has_keyword_form(string_value)) { + defined[term] = true; + return; + } + if (string_value == term) { + // A self-referential simple term resolves through the term itself. + const auto colon{term.find(':')}; + if (colon != JSON::String::npos) { + const auto prefix{term.substr(0, colon)}; + const auto suffix{term.substr(colon + 1)}; + if (prefix != "_" && !suffix.starts_with("//") && + local_context.is_object() && local_context.defines(prefix)) { + const auto iterator{defined.find(prefix)}; + if (iterator == defined.cend() || !iterator->second) { + create_term_definition(state, active_context, local_context, prefix, + defined, context_pointer); + } + } + const auto prefix_definition{active_context.terms.find(prefix)}; + if (prefix_definition != active_context.terms.cend() && + prefix_definition->second.iri.has_value()) { + definition.iri = prefix_definition->second.iri.value() + suffix; + } else { + definition.iri = term; + } + } else if (term.find('/') != JSON::String::npos) { + definition.iri = expand_iri(state, active_context, term, false, true, + nullptr, nullptr, empty_weak_pointer); + } else if (active_context.vocabulary.has_value()) { + definition.iri = active_context.vocabulary.value() + term; + } + } else { + definition.iri = + expand_iri(state, active_context, string_value, false, true, + &local_context, &defined, context_pointer); + // In 1.1, an IRI-like term must expand to its IRI mapping. + if (!state.processing_1_0 && definition.iri.has_value()) { + const auto colon_position{term.find(':')}; + const bool iri_like_colon{colon_position != JSON::String::npos && + colon_position != 0 && + colon_position + 1 != term.size()}; + if (iri_like_colon || term.find('/') != JSON::String::npos) { + auto probe{active_context}; + const auto expanded_term{expand_iri(state, probe, term, false, true, + nullptr, nullptr, + empty_weak_pointer)}; + if (expanded_term.has_value() && expanded_term != definition.iri) { + throw JSONLDError("Invalid IRI mapping", term_pointer); + } + } + } + } + } else if (value.is_object()) { + const bool has_id{id_entry != nullptr}; + const JSON *const id{id_entry}; + if (const auto *reverse_entry{ + value.try_at(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)}) { + if (has_id || value.defines(KEYWORD_NEST, KEYWORD_NEST_HASH)) { + throw JSONLDError("Invalid reverse property", term_pointer, + {KEYWORD_REVERSE}); + } + const auto &reverse{*reverse_entry}; + if (!reverse.is_string()) { + throw JSONLDError("Invalid IRI mapping", term_pointer, + {KEYWORD_REVERSE}); + } + definition.reverse = true; + definition.iri = + expand_iri(state, active_context, reverse.to_string(), false, true, + &local_context, &defined, context_pointer); + if (!definition.iri.has_value()) { + // A reverse value with the form of a keyword is ignored. + defined[term] = true; + return; + } + if (definition.iri.value().find(':') == JSON::String::npos) { + throw JSONLDError("Invalid IRI mapping", term_pointer, + {KEYWORD_REVERSE}); + } + } else if (has_id && !id->is_null() && + (!id->is_string() || id->to_string() != term)) { + if (!id->is_string()) { + throw JSONLDError("Invalid IRI mapping", term_pointer, {KEYWORD_ID}); + } + const auto &id_value{id->to_string()}; + if (!is_keyword(id_value) && has_keyword_form(id_value)) { + defined[term] = true; + return; + } + definition.iri = expand_iri(state, active_context, id_value, false, true, + &local_context, &defined, context_pointer); + const auto &mapping{definition.iri}; + if (!mapping.has_value() || + (!is_keyword(mapping.value()) && + mapping.value().find(':') == JSON::String::npos && + !active_context.vocabulary.has_value())) { + throw JSONLDError("Invalid IRI mapping", term_pointer, {KEYWORD_ID}); + } + if (mapping.has_value() && mapping.value() == KEYWORD_CONTEXT) { + throw JSONLDError("Invalid keyword alias", term_pointer, {KEYWORD_ID}); + } + // In 1.1, a term that itself has the form of an IRI (a colon other than + // at the edges, or a slash) must expand to its IRI mapping. + if (!state.processing_1_0 && mapping.has_value()) { + const auto colon_position{term.find(':')}; + const bool iri_like_colon{colon_position != JSON::String::npos && + colon_position != 0 && + colon_position + 1 != term.size()}; + if (iri_like_colon || term.find('/') != JSON::String::npos) { + auto probe{active_context}; + const auto expanded_term{expand_iri(state, probe, term, false, true, + nullptr, nullptr, + empty_weak_pointer)}; + if (expanded_term.has_value() && expanded_term != mapping) { + throw JSONLDError("Invalid IRI mapping", term_pointer, + {KEYWORD_ID}); + } + } + } + } else if (term.find(':') != JSON::String::npos && !term.starts_with(':') && + !term.ends_with(':')) { + const auto colon{term.find(':')}; + const auto prefix{term.substr(0, colon)}; + const auto suffix{term.substr(colon + 1)}; + if (prefix != "_" && !suffix.starts_with("//") && + local_context.is_object() && local_context.defines(prefix)) { + const auto iterator{defined.find(prefix)}; + if (iterator == defined.cend() || !iterator->second) { + create_term_definition(state, active_context, local_context, prefix, + defined, context_pointer); + } + } + const auto prefix_definition{active_context.terms.find(prefix)}; + if (prefix_definition != active_context.terms.cend() && + prefix_definition->second.iri.has_value()) { + definition.iri = prefix_definition->second.iri.value() + suffix; + } else { + definition.iri = term; + } + } else if (term.find('/') != JSON::String::npos) { + definition.iri = expand_iri(state, active_context, term, false, true, + nullptr, nullptr, empty_weak_pointer); + } else if (active_context.vocabulary.has_value()) { + definition.iri = active_context.vocabulary.value() + term; + } + + if (const auto *type_entry{value.try_at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}) { + const auto &type_value{*type_entry}; + if (!type_value.is_string()) { + throw JSONLDError("Invalid type mapping", term_pointer, {KEYWORD_TYPE}); + } + const auto type{expand_iri(state, active_context, type_value.to_string(), + false, true, &local_context, &defined, + context_pointer)}; + if (!type.has_value() || type.value().starts_with("_:") || + (type.value() != KEYWORD_ID && type.value() != KEYWORD_VOCAB && + type.value() != KEYWORD_JSON && type.value() != KEYWORD_NONE && + type.value().find(':') == JSON::String::npos) || + (state.processing_1_0 && + (type.value() == KEYWORD_JSON || type.value() == KEYWORD_NONE))) { + throw JSONLDError("Invalid type mapping", term_pointer, {KEYWORD_TYPE}); + } + definition.type_mapping = type; + } + + if (const auto *container_entry{ + value.try_at(KEYWORD_CONTAINER, KEYWORD_CONTAINER_HASH)}) { + const auto &container{*container_entry}; + if (container.is_array()) { + // Array containers are a 1.1 feature. + if (state.processing_1_0) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + for (const auto &item : container.as_array()) { + if (!item.is_string()) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + const auto &item_string{item.to_string()}; + if (!is_valid_container(item_string)) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + // A keyword may not appear more than once in the container array. + for (const auto &seen : definition.container) { + if (seen == item_string) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + } + definition.container.push_back(item_string); + } + } else if (container.is_string()) { + const auto &container_string{container.to_string()}; + // In 1.0, the @graph, @id and @type containers are not permitted. + if (state.processing_1_0 && (container_string == KEYWORD_GRAPH || + container_string == KEYWORD_ID || + container_string == KEYWORD_TYPE)) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + if (!is_valid_container(container_string)) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + definition.container.push_back(container_string); + } else { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + if (definition.reverse) { + for (const auto &item : definition.container) { + if (item != KEYWORD_SET && item != KEYWORD_INDEX) { + throw JSONLDError("Invalid reverse property", term_pointer, + {KEYWORD_CONTAINER}); + } + } + } + bool container_graph{false}; + bool container_id{false}; + bool container_index{false}; + bool container_language{false}; + bool container_list{false}; + bool container_set{false}; + bool container_type{false}; + for (const auto &item : definition.container) { + if (item == KEYWORD_GRAPH) { + container_graph = true; + } else if (item == KEYWORD_ID) { + container_id = true; + } else if (item == KEYWORD_INDEX) { + container_index = true; + } else if (item == KEYWORD_LANGUAGE) { + container_language = true; + } else if (item == KEYWORD_LIST) { + container_list = true; + } else if (item == KEYWORD_SET) { + container_set = true; + } else if (item == KEYWORD_TYPE) { + container_type = true; + } + } + // Valid array combinations (JSON-LD 1.1 API Section 5.1.1 step 19.1): a + // single keyword, or @graph with exactly one of @id or @index optionally + // with @set, or @set combined with any of @index, @graph, @id, @type, or + // @language. + if (definition.container.size() != 1) { + const bool graph_form{ + container_graph && (container_id != container_index) && + !container_list && !container_type && !container_language}; + const bool set_form{container_set && !container_list}; + if (!graph_form && !set_form) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + } + // A type-map container may only coerce its keys to identifiers. + if (container_type && definition.type_mapping.has_value() && + definition.type_mapping.value() != KEYWORD_ID && + definition.type_mapping.value() != KEYWORD_VOCAB) { + throw JSONLDError("Invalid type mapping", term_pointer, {KEYWORD_TYPE}); + } + } + + if (const auto *language_entry{ + value.try_at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH)}; + language_entry != nullptr && + !value.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + const auto &language{*language_entry}; + if (!language.is_null() && !language.is_string()) { + throw JSONLDError("Invalid language mapping", term_pointer, + {KEYWORD_LANGUAGE}); + } + definition.has_language = true; + if (language.is_string()) { + definition.language = language.to_string(); + } + } + + if (const auto *direction_entry{ + value.try_at(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH)}; + direction_entry != nullptr && + !value.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + const auto &direction{*direction_entry}; + if (!direction.is_null() && + (!direction.is_string() || (direction.to_string() != "ltr" && + direction.to_string() != "rtl"))) { + throw JSONLDError("Invalid base direction", term_pointer, + {KEYWORD_DIRECTION}); + } + definition.has_direction = true; + if (direction.is_string()) { + definition.direction = direction.to_string(); + } + } + + if (const auto *context_entry{ + value.try_at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH)}) { + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_CONTEXT}); + } + // Validate the scoped context eagerly so that errors surface even when + // the term is never used. Remote scoped contexts (including recursive + // ones) are validated lazily when the term is used instead. + const bool saved_override{state.protected_override}; + const bool saved_context_protected{state.context_protected}; + try { + // The error raised here is always discarded below, so its location does + // not matter. + ActiveContext probe{active_context}; + state.protected_override = true; + process_context(state, probe, *context_entry, empty_weak_pointer); + state.protected_override = saved_override; + state.context_protected = saved_context_protected; + } catch (const JSONLDError &error) { + state.protected_override = saved_override; + state.context_protected = saved_context_protected; + const JSON::StringView code{error.what()}; + if (code != "Loading remote context failed" && + code != "Recursive context inclusion" && + code != "Invalid remote context") { + throw JSONLDError("Invalid scoped context", term_pointer, + {KEYWORD_CONTEXT}); + } + } + definition.context = *context_entry; + definition.context_base = state.context_resolution_base(); + } + + if (const auto *prefix_entry{ + value.try_at(KEYWORD_PREFIX, KEYWORD_PREFIX_HASH)}) { + if (state.processing_1_0 || term.find(':') != JSON::String::npos || + term.find('/') != JSON::String::npos) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_PREFIX}); + } + if (!prefix_entry->is_boolean()) { + throw JSONLDError("Invalid @prefix value", term_pointer, + {KEYWORD_PREFIX}); + } + definition.prefix = prefix_entry->to_boolean(); + if (definition.prefix && definition.iri.has_value() && + is_keyword(definition.iri.value())) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_PREFIX}); + } + } + + if (const auto *nest_entry{value.try_at(KEYWORD_NEST, KEYWORD_NEST_HASH)}) { + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_NEST}); + } + const auto &nest{*nest_entry}; + if (!nest.is_string()) { + throw JSONLDError("Invalid @nest value", term_pointer, {KEYWORD_NEST}); + } + const auto &nest_string{nest.to_string()}; + if (is_keyword(nest_string) && nest_string != KEYWORD_NEST) { + throw JSONLDError("Invalid @nest value", term_pointer, {KEYWORD_NEST}); + } + } + + if (const auto *index_entry{ + value.try_at(KEYWORD_INDEX, KEYWORD_INDEX_HASH)}) { + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_INDEX}); + } + bool has_index_container{false}; + for (const auto &item : definition.container) { + if (item == KEYWORD_INDEX) { + has_index_container = true; + } + } + const auto &index{*index_entry}; + if (!index.is_string() || !has_index_container) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_INDEX}); + } + const auto &index_string{index.to_string()}; + const auto index_iri{expand_iri(state, active_context, index_string, + false, true, &local_context, &defined, + context_pointer)}; + if (!index_iri.has_value() || is_keyword(index_iri.value())) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_INDEX}); + } + definition.index = index_string; + } + + if (const auto *protected_entry{ + value.try_at(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH)}) { + if (!protected_entry->is_boolean()) { + throw JSONLDError("Invalid @protected value", term_pointer, + {KEYWORD_PROTECTED}); + } + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_PROTECTED}); + } + definition.is_protected = protected_entry->to_boolean(); + } + + // A term definition may not contain any entry other than the keywords + // recognised above. + for (const auto &entry : value.as_object()) { + const JSON::StringView key{entry.first}; + if (key != KEYWORD_ID && key != KEYWORD_REVERSE && + key != KEYWORD_CONTAINER && key != KEYWORD_CONTEXT && + key != KEYWORD_DIRECTION && key != KEYWORD_INDEX && + key != KEYWORD_LANGUAGE && key != KEYWORD_NEST && + key != KEYWORD_PREFIX && key != KEYWORD_PROTECTED && + key != KEYWORD_TYPE) { + throw JSONLDError("Invalid term definition", term_pointer); + } + } + } else { + throw JSONLDError("Invalid term definition", term_pointer); + } + + if (simple_term && term.find(':') == JSON::String::npos && + term.find('/') == JSON::String::npos && definition.iri.has_value() && + (ends_with_gen_delim(definition.iri.value()) || + definition.iri.value().starts_with("_:"))) { + definition.prefix = true; + } + + if (!definition.reverse && !definition.iri.has_value()) { + throw JSONLDError("Invalid IRI mapping", term_pointer); + } + + finalize_definition(state, active_context, defined, term, term_pointer, + previous, std::move(definition)); +} + +} // namespace sourcemeta::core diff --git a/src/core/jsonld/jsonld_expansion.cc b/src/core/jsonld/jsonld_expansion.cc new file mode 100644 index 000000000..a79bbc049 --- /dev/null +++ b/src/core/jsonld/jsonld_expansion.cc @@ -0,0 +1,960 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::ranges::sort +#include // std::size_t +#include // std::optional +#include // std::move, std::pair +#include // std::vector + +namespace sourcemeta::core { + +namespace { + +auto into_array(JSON &&value) -> JSON { + if (value.is_array()) { + return std::move(value); + } + auto result{JSON::make_array()}; + result.push_back(std::move(value)); + return result; +} + +// The entries of an object in sorted key order, which is the order expansion +// uses so that values merged from several keys are deterministic. The keys and +// values are referenced from the object (which must outlive the result), never +// copied. +auto sorted_entries(const JSON &object) + -> std::vector> { + std::vector> entries; + for (const auto &entry : object.as_object()) { + entries.emplace_back(&entry.first, &entry.second); + } + std::ranges::sort(entries, [](const auto &left, const auto &right) -> bool { + return *left.first < *right.first; + }); + return entries; +} + +// Append the values, which must be an array, into the array stored at the given +// key, creating it if absent. +auto merge(JSON &object, const JSON::StringView name, JSON &&values) -> void { + if (object.defines(name)) { + for (auto &item : values.as_array()) { + object.at(name).push_back(item); + } + } else { + object.assign(name, std::move(values)); + } +} + +auto container_includes(const TermDefinition *const definition, + const JSON::StringView name) -> bool { + if (definition == nullptr) { + return false; + } + for (const auto &entry : definition->container) { + if (entry == name) { + return true; + } + } + return false; +} + +// Expand a single @type value against the context that preceded type-scoped +// processing. +auto expand_type(ExpansionState &state, const ActiveContext &type_context, + const JSON &item) -> JSON { + auto context{type_context}; + const auto type{expand_iri(state, context, item.to_string(), true, true, + nullptr, nullptr, empty_weak_pointer)}; + return type.has_value() ? JSON{type.value()} : JSON{nullptr}; +} + +// Expand the direct (and deferred @nest) entries of a map into the result, +// mutating it in place. Mutually recursive with expand_object. +auto expand_entries(ExpansionState &state, ActiveContext &active_context, + const ActiveContext &type_context, JSON &result, + const std::optional &active_property, + const JSON &source, const WeakPointer &source_pointer) + -> void; + +// Expand a map element: the node-object (and value-object) branch of the +// Expansion algorithm, factored out of expand() below. +auto expand_object(ExpansionState &state, ActiveContext active_context, + const std::optional &active_property, + const JSON &element, const WeakPointer &pointer) -> JSON { + auto result{JSON::make_object()}; + + // @type values are expanded against the context before type-scoped contexts + // are applied. + const ActiveContext type_context{active_context}; + + // Type-scoped contexts (JSON-LD 1.1 API Section 5.1.2 step 11). The values + // are referenced from the input element, never copied. + std::vector type_values; + for (const auto &entry : element.as_object()) { + const auto expanded{expand_iri(state, active_context, entry.first, false, + true, nullptr, nullptr, empty_weak_pointer)}; + if (!expanded.has_value() || expanded.value() != KEYWORD_TYPE) { + continue; + } + if (entry.second.is_array()) { + for (const auto &item : entry.second.as_array()) { + if (item.is_string()) { + type_values.push_back(item.to_string()); + } + } + } else if (entry.second.is_string()) { + type_values.push_back(entry.second.to_string()); + } + } + std::ranges::sort(type_values); + for (const auto &type : type_values) { + // Each type-scoped context is resolved against the context that preceded + // type-scoped processing, so one type's context cannot hide another's. + const auto definition{type_context.terms.find(type)}; + if (definition != type_context.terms.cend() && + definition->second.context.has_value()) { + const auto &scoped{definition->second.context.value()}; + const auto saved_base{state.context_base_override}; + state.context_base_override = definition->second.context_base; + process_context(state, active_context, scoped, pointer, false); + state.context_base_override = saved_base; + } + } + + expand_entries(state, active_context, type_context, result, active_property, + element, pointer); + + // An empty reverse map carries no information. + if (const auto *reverse{result.try_at(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)}; + reverse != nullptr && reverse->empty()) { + result.erase(KEYWORD_REVERSE); + } + + // Post-processing (JSON-LD 1.1 API Section 5.1.2) + if (const auto *value_entry{ + result.try_at(KEYWORD_VALUE, KEYWORD_VALUE_HASH)}) { + const JSON *const type{result.try_at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}; + const bool has_type{type != nullptr}; + const JSON::String *const type_string{ + type != nullptr && type->is_string() ? &type->to_string() : nullptr}; + const bool is_json{type_string != nullptr && *type_string == KEYWORD_JSON}; + for (const auto &entry : result.as_object()) { + const auto &name{entry.first}; + if (name != KEYWORD_VALUE && name != KEYWORD_TYPE && + name != KEYWORD_LANGUAGE && name != KEYWORD_INDEX && + name != KEYWORD_DIRECTION) { + throw JSONLDError("Invalid value object", pointer); + } + if ((name == KEYWORD_LANGUAGE || name == KEYWORD_DIRECTION) && has_type) { + throw JSONLDError("Invalid value object", pointer); + } + } + const auto &content{*value_entry}; + if (content.is_null() && !is_json) { + return JSON{nullptr}; + } + if (result.defines(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) && + !content.is_string()) { + throw JSONLDError("Invalid language-tagged value", pointer); + } + if (has_type && (type_string == nullptr || type_string->starts_with("_:") || + type_string->find(' ') != JSON::String::npos)) { + throw JSONLDError("Invalid typed value", pointer); + } + if (!is_json && !content.is_string() && !content.is_number() && + !content.is_boolean()) { + throw JSONLDError("Invalid value object value", pointer); + } + } else if (const auto *type_entry{ + result.try_at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}; + type_entry != nullptr && !type_entry->is_array()) { + // Node objects always carry @type as an array. + result.assign(KEYWORD_TYPE, into_array(JSON{*type_entry})); + } + + // A set or list object may only carry an @index entry besides, and this is + // validated before any value is dropped. + if (result.defines(KEYWORD_LIST, KEYWORD_LIST_HASH) || + result.defines(KEYWORD_SET, KEYWORD_SET_HASH)) { + for (const auto &entry : result.as_object()) { + const auto &name{entry.first}; + if (name != KEYWORD_LIST && name != KEYWORD_SET && + name != KEYWORD_INDEX) { + throw JSONLDError("Invalid set or list object", pointer); + } + } + } + + // A bare @set collapses to its array. + if (const auto *set{result.try_at(KEYWORD_SET, KEYWORD_SET_HASH)}) { + return *set; + } + + // Drop an incomplete value object that has a language or direction but no + // value. + if (!result.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) && + (result.defines(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) || + result.defines(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH))) { + bool only_value_keys{true}; + for (const auto &entry : result.as_object()) { + const auto &name{entry.first}; + if (name != KEYWORD_LANGUAGE && name != KEYWORD_DIRECTION && + name != KEYWORD_INDEX) { + only_value_keys = false; + } + } + if (only_value_keys) { + return JSON{nullptr}; + } + } + + // Drop free-floating values when not under a property. + if (!active_property.has_value() || + active_property.value() == KEYWORD_GRAPH) { + if (result.empty() || result.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || + result.defines(KEYWORD_LIST, KEYWORD_LIST_HASH) || + (result.object_size() == 1 && + result.defines(KEYWORD_ID, KEYWORD_ID_HASH))) { + return JSON{nullptr}; + } + } + + return result; +} + +auto expand_entries(ExpansionState &state, ActiveContext &active_context, + const ActiveContext &type_context, JSON &result, + const std::optional &active_property, + const JSON &source, const WeakPointer &source_pointer) + -> void { + // @nest entries are deferred and processed after the direct ones. The + // property is referenced from the source object, never copied. + std::vector> nests; + for (const auto &[key_pointer, value_pointer] : sorted_entries(source)) { + const std::pair entry{*key_pointer, + *value_pointer}; + const JSON::String &property{entry.first}; + const WeakPointer entry_pointer{source_pointer.concat(property)}; + if (property == KEYWORD_CONTEXT) { + continue; + } + + const auto expanded_property{expand_iri(state, active_context, property, + false, true, nullptr, nullptr, + empty_weak_pointer)}; + + if (expanded_property.has_value() && + expanded_property.value() == KEYWORD_NEST) { + if (entry.second.is_array()) { + for (const auto &nest_value : entry.second.as_array()) { + if (!nest_value.is_object() || + nest_value.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + throw JSONLDError("Invalid @nest value", entry_pointer); + } + nests.emplace_back(&property, &nest_value); + } + } else if (entry.second.is_object() && + !entry.second.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + nests.emplace_back(&property, &entry.second); + } else { + throw JSONLDError("Invalid @nest value", entry_pointer); + } + continue; + } + if (!expanded_property.has_value()) { + continue; + } + + const auto &name{expanded_property.value()}; + if (name.find(':') == JSON::String::npos && !is_keyword(name)) { + continue; + } + + if (is_keyword(name) && active_property.has_value() && + active_property.value() == KEYWORD_REVERSE) { + throw JSONLDError("Invalid reverse property map", entry_pointer); + } + + // The @type and @included exemption from colliding keywords does not apply + // in json-ld-1.0. + if (is_keyword(name) && result.defines(name) && + (state.processing_1_0 || + (name != KEYWORD_TYPE && name != KEYWORD_INCLUDED))) { + throw JSONLDError("Colliding keywords", entry_pointer); + } + + if (name == KEYWORD_ID) { + if (!entry.second.is_string()) { + throw JSONLDError("Invalid @id value", entry_pointer); + } + const auto identifier{expand_iri(state, active_context, + entry.second.to_string(), true, false, + nullptr, nullptr, empty_weak_pointer)}; + if (identifier.has_value()) { + result.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{identifier.value()}, KEYWORD_ID_HASH); + } else { + result.assign_assume_new(JSON::String{KEYWORD_ID}, JSON{nullptr}, + KEYWORD_ID_HASH); + } + continue; + } + + if (name == KEYWORD_TYPE) { + if (entry.second.is_array()) { + for (const auto &item : entry.second.as_array()) { + if (!item.is_string()) { + throw JSONLDError("Invalid type value", entry_pointer); + } + } + } else if (!entry.second.is_string()) { + throw JSONLDError("Invalid type value", entry_pointer); + } + // Expand each value, preserving whether the input was a string or an + // array. The node-object post-processing later turns a lone string into + // an array, but a value object keeps its @type as a string. + JSON expanded_type{nullptr}; + if (entry.second.is_array()) { + expanded_type = JSON::make_array(); + for (const auto &item : entry.second.as_array()) { + // A value that does not expand to an IRI is omitted, so @type stays + // an array of strings. + auto type{expand_type(state, type_context, item)}; + if (!type.is_null()) { + expanded_type.push_back(std::move(type)); + } + } + } else { + expanded_type = expand_type(state, type_context, entry.second); + } + // A lone @type value that does not expand to an IRI carries nothing. + if (expanded_type.is_null()) { + continue; + } + if (result.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + auto merged{ + into_array(std::move(result.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)))}; + auto expanded_type_array{into_array(std::move(expanded_type))}; + for (auto &item : expanded_type_array.as_array()) { + merged.push_back(item); + } + result.assign(KEYWORD_TYPE, std::move(merged)); + } else { + result.assign_assume_new(JSON::String{KEYWORD_TYPE}, + std::move(expanded_type), KEYWORD_TYPE_HASH); + } + continue; + } + + if (name == KEYWORD_VALUE) { + result.assign_assume_new(JSON::String{KEYWORD_VALUE}, JSON{entry.second}, + KEYWORD_VALUE_HASH); + continue; + } + + if (name == KEYWORD_LANGUAGE) { + if (!entry.second.is_string()) { + throw JSONLDError("Invalid language-tagged string", entry_pointer); + } + result.assign_assume_new(JSON::String{KEYWORD_LANGUAGE}, + JSON{entry.second}, KEYWORD_LANGUAGE_HASH); + continue; + } + + if (name == KEYWORD_DIRECTION) { + if (state.processing_1_0) { + continue; + } + if (!entry.second.is_string() || (entry.second.to_string() != "ltr" && + entry.second.to_string() != "rtl")) { + throw JSONLDError("Invalid base direction", entry_pointer); + } + result.assign_assume_new(JSON::String{KEYWORD_DIRECTION}, + JSON{entry.second}, KEYWORD_DIRECTION_HASH); + continue; + } + + if (name == KEYWORD_LIST || name == KEYWORD_SET) { + auto elements{JSON::make_array()}; + const auto values{into_array(JSON{entry.second})}; + std::size_t value_index{0}; + for (const auto &item : values.as_array()) { + const WeakPointer item_pointer{entry.second.is_array() + ? entry_pointer.concat(value_index) + : entry_pointer}; + auto expanded_item{ + expand(state, active_context, active_property, item, item_pointer)}; + if (expanded_item.is_array()) { + for (auto &nested : expanded_item.as_array()) { + elements.push_back(nested); + } + } else if (!expanded_item.is_null()) { + elements.push_back(std::move(expanded_item)); + } + value_index += 1; + } + if (name == KEYWORD_LIST && state.processing_1_0) { + for (const auto &item : elements.as_array()) { + if (item.is_object() && + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + throw JSONLDError("List of lists", entry_pointer); + } + } + } + result.assign(name, std::move(elements)); + continue; + } + + if (name == KEYWORD_GRAPH) { + // @graph expands to an array of node objects, so a value that expands to + // null contributes no element rather than a null one. + auto graph{expand(state, active_context, JSON::String{KEYWORD_GRAPH}, + entry.second, entry_pointer)}; + merge(result, KEYWORD_GRAPH, + graph.is_null() ? JSON::make_array() + : into_array(std::move(graph))); + continue; + } + + if (name == KEYWORD_INCLUDED) { + if (state.processing_1_0) { + continue; + } + auto included{into_array(expand(state, active_context, std::nullopt, + entry.second, entry_pointer))}; + for (const auto &item : included.as_array()) { + if (!item.is_object() || + item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH) || + item.defines(KEYWORD_SET, KEYWORD_SET_HASH)) { + throw JSONLDError("Invalid @included value", entry_pointer); + } + } + merge(result, KEYWORD_INCLUDED, std::move(included)); + continue; + } + + if (name == KEYWORD_INDEX) { + if (!entry.second.is_string()) { + throw JSONLDError("Invalid @index value", entry_pointer); + } + result.assign_assume_new(JSON::String{KEYWORD_INDEX}, JSON{entry.second}, + KEYWORD_INDEX_HASH); + continue; + } + + if (name == KEYWORD_REVERSE) { + if (!entry.second.is_object()) { + throw JSONLDError("Invalid @reverse value", entry_pointer); + } + auto reversed{expand(state, active_context, JSON::String{KEYWORD_REVERSE}, + entry.second, entry_pointer)}; + if (reversed.is_object()) { + const auto *existing_reverse{ + result.try_at(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)}; + auto reverse_map{existing_reverse != nullptr ? *existing_reverse + : JSON::make_object()}; + for (const auto &reverse_entry : reversed.as_object()) { + const auto &reverse_property{reverse_entry.first}; + if (reverse_entry.hash == KEYWORD_REVERSE_HASH) { + for (auto &forward : reverse_entry.second.as_object()) { + merge(result, JSON::StringView{forward.first}, + into_array(JSON{forward.second})); + } + } else if (is_keyword(reverse_property, reverse_entry.hash)) { + throw JSONLDError("Invalid reverse property map", entry_pointer); + } else { + const auto reverse_values{into_array(JSON{reverse_entry.second})}; + for (const auto &item : reverse_values.as_array()) { + if (item.is_object() && + (item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH))) { + throw JSONLDError("Invalid reverse property value", + entry_pointer); + } + } + merge(reverse_map, reverse_property, + into_array(JSON{reverse_entry.second})); + } + } + result.assign(KEYWORD_REVERSE, std::move(reverse_map)); + } + continue; + } + + if (is_keyword(name)) { + // Keywords with no property-level handler (such as @vocab or @none + // reached through an alias) carry no expanded value, so the Expansion + // algorithm adds nothing to the result for them. + continue; + } + + const TermDefinition *definition{nullptr}; + const auto term{active_context.terms.find(property)}; + if (term != active_context.terms.cend()) { + definition = &term->second; + } + + // Property-scoped context (JSON-LD 1.1 API Section 5.1.2 step 13.3) + ActiveContext scoped_context; + const bool scoped{definition != nullptr && definition->context.has_value()}; + if (scoped) { + scoped_context = active_context; + // A property-scoped context propagates by default, so it does not + // inherit an enclosing type-scoped revert. It may, however, set its + // own revert when it specifies @propagate: false. + scoped_context.previous = nullptr; + const auto saved_override{state.protected_override}; + state.protected_override = true; + const auto saved_base{state.context_base_override}; + state.context_base_override = definition->context_base; + process_context(state, scoped_context, definition->context.value(), + entry_pointer); + state.context_base_override = saved_base; + state.protected_override = saved_override; + } + ActiveContext &value_context{scoped ? scoped_context : active_context}; + + JSON expanded_value{nullptr}; + if (definition != nullptr && definition->type_mapping.has_value() && + definition->type_mapping.value() == KEYWORD_JSON) { + // A term coerced to @json keeps its value verbatim. + auto json_value{JSON::make_object()}; + json_value.assign_assume_new(JSON::String{KEYWORD_VALUE}, + JSON{entry.second}, KEYWORD_VALUE_HASH); + json_value.assign_assume_new(JSON::String{KEYWORD_TYPE}, + JSON{KEYWORD_JSON}, KEYWORD_TYPE_HASH); + expanded_value = std::move(json_value); + } else if (entry.second.is_object() && + container_includes(definition, KEYWORD_GRAPH) && + (container_includes(definition, KEYWORD_ID) || + container_includes(definition, KEYWORD_INDEX))) { + const bool by_id{container_includes(definition, KEYWORD_ID)}; + const bool property_valued{definition->index.has_value() && + definition->index.value() != KEYWORD_INDEX}; + std::optional index_property; + if (property_valued) { + index_property = + expand_iri(state, value_context, definition->index.value(), false, + true, nullptr, nullptr, empty_weak_pointer); + } + expanded_value = JSON::make_array(); + for (const auto &[graph_key, graph_value] : + sorted_entries(entry.second)) { + const JSON::String &index{*graph_key}; + const auto expanded_key{index == KEYWORD_NONE + ? std::optional{KEYWORD_NONE} + : expand_iri(state, value_context, index, + true, false, nullptr, nullptr, + empty_weak_pointer)}; + const bool none_key{expanded_key.has_value() && + expanded_key.value() == KEYWORD_NONE}; + auto graph_items{ + into_array(expand(state, value_context, property, *graph_value, + entry_pointer.concat(index)))}; + for (auto &item : graph_items.as_array()) { + // Wrap the item in a graph object, unless it is already one. + JSON graph{nullptr}; + if (item.is_object() && + item.defines(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH)) { + graph = std::move(item); + } else { + graph = JSON::make_object(); + graph.assign_assume_new(JSON::String{KEYWORD_GRAPH}, + into_array(std::move(item)), + KEYWORD_GRAPH_HASH); + } + if (!none_key) { + if (by_id) { + if (!graph.defines(KEYWORD_ID, KEYWORD_ID_HASH)) { + graph.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{expanded_key.value_or(index)}, + KEYWORD_ID_HASH); + } + } else if (property_valued) { + auto combined{into_array(expand_value( + state, value_context, definition->index, JSON{index}))}; + if (graph.defines(index_property.value())) { + for (auto &existing : + graph.at(index_property.value()).as_array()) { + combined.push_back(existing); + } + } + graph.assign(index_property.value(), std::move(combined)); + } else if (!graph.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + graph.assign_assume_new(JSON::String{KEYWORD_INDEX}, JSON{index}, + KEYWORD_INDEX_HASH); + } + } + expanded_value.push_back(std::move(graph)); + } + } + } else if (entry.second.is_object() && + container_includes(definition, KEYWORD_LANGUAGE)) { + expanded_value = JSON::make_array(); + for (const auto &[language_key, language_value] : + sorted_entries(entry.second)) { + const JSON::String &language{*language_key}; + const auto expanded_language{expand_iri(state, value_context, language, + false, true, nullptr, nullptr, + empty_weak_pointer)}; + const bool is_none{language == KEYWORD_NONE || + (expanded_language.has_value() && + expanded_language.value() == KEYWORD_NONE)}; + auto language_items{into_array(JSON{*language_value})}; + for (auto &item : language_items.as_array()) { + if (item.is_null()) { + continue; + } + if (!item.is_string()) { + throw JSONLDError("Invalid language map value", + entry_pointer.concat(language)); + } + auto value{JSON::make_object()}; + value.assign_assume_new(JSON::String{KEYWORD_VALUE}, JSON{item}, + KEYWORD_VALUE_HASH); + if (!is_none) { + value.assign_assume_new(JSON::String{KEYWORD_LANGUAGE}, + JSON{language}, KEYWORD_LANGUAGE_HASH); + } + const auto direction{definition->has_direction + ? definition->direction + : value_context.default_direction}; + if (direction.has_value()) { + value.assign_assume_new(JSON::String{KEYWORD_DIRECTION}, + JSON{direction.value()}, + KEYWORD_DIRECTION_HASH); + } + expanded_value.push_back(std::move(value)); + } + } + } else if (entry.second.is_object() && + container_includes(definition, KEYWORD_INDEX)) { + const bool property_valued{definition->index.has_value() && + definition->index.value() != KEYWORD_INDEX}; + std::optional index_property; + if (property_valued) { + index_property = + expand_iri(state, value_context, definition->index.value(), false, + true, nullptr, nullptr, empty_weak_pointer); + } + expanded_value = JSON::make_array(); + for (const auto &[index_key, index_value] : + sorted_entries(entry.second)) { + const JSON::String &index{*index_key}; + auto index_items{ + into_array(expand(state, value_context, property, *index_value, + entry_pointer.concat(index)))}; + for (auto &item : index_items.as_array()) { + if (index != KEYWORD_NONE) { + if (property_valued) { + if (item.is_object() && + item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + throw JSONLDError("Invalid value object", + entry_pointer.concat(index)); + } + // The index value is prepended to any existing values. + auto combined{into_array(expand_value( + state, value_context, definition->index, JSON{index}))}; + if (item.defines(index_property.value())) { + for (auto &existing : + item.at(index_property.value()).as_array()) { + combined.push_back(existing); + } + } + item.assign(index_property.value(), std::move(combined)); + } else if (!item.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + item.assign_assume_new(JSON::String{KEYWORD_INDEX}, JSON{index}, + KEYWORD_INDEX_HASH); + } + } + expanded_value.push_back(item); + } + } + } else if (entry.second.is_object() && + (container_includes(definition, KEYWORD_ID) || + container_includes(definition, KEYWORD_TYPE))) { + const bool by_id{container_includes(definition, KEYWORD_ID)}; + expanded_value = JSON::make_array(); + for (const auto &[map_key, map_value] : sorted_entries(entry.second)) { + const JSON::String &index{*map_key}; + std::optional expanded_index; + if (index != KEYWORD_NONE) { + expanded_index = + expand_iri(state, value_context, index, by_id, !by_id, nullptr, + nullptr, empty_weak_pointer); + } + // The key may be an alias of @none, which carries no identifier. + if (expanded_index.has_value() && + expanded_index.value() == KEYWORD_NONE) { + expanded_index = std::nullopt; + } + // A type map key may carry a type-scoped context for its values. + // Type-scoped contexts do not propagate, so the values are resolved + // against the context that preceded the containing type-scoped + // context, with only this key's context layered on top. + const ActiveContext &base_context{value_context.previous && !by_id + ? *value_context.previous + : value_context}; + ActiveContext entry_context{base_context}; + if (!by_id) { + // Resolve the type term against the context the copy was made from, + // which outlives the copy being mutated below. + const auto type_definition{base_context.terms.find(index)}; + if (type_definition != base_context.terms.cend() && + type_definition->second.context.has_value()) { + const auto saved_base{state.context_base_override}; + state.context_base_override = type_definition->second.context_base; + process_context(state, entry_context, + type_definition->second.context.value(), + entry_pointer.concat(index)); + state.context_base_override = saved_base; + entry_context.previous = nullptr; + } + } + // String values in a type map are node references. + auto entries{JSON::make_array()}; + auto raw_values{into_array(JSON{*map_value})}; + for (auto &raw : raw_values.as_array()) { + if (raw.is_string() && !by_id) { + auto reference{JSON::make_object()}; + const bool reference_vocab{definition->type_mapping.has_value() && + definition->type_mapping.value() == + KEYWORD_VOCAB}; + const auto &raw_string{raw.to_string()}; + const auto referenced{expand_iri(state, value_context, raw_string, + true, reference_vocab, nullptr, + nullptr, empty_weak_pointer)}; + reference.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{referenced.value_or(raw_string)}, + KEYWORD_ID_HASH); + entries.push_back(std::move(reference)); + } else { + auto expanded_items{ + into_array(expand(state, entry_context, property, raw, + entry_pointer.concat(index)))}; + for (auto &expanded : expanded_items.as_array()) { + entries.push_back(expanded); + } + } + } + for (auto &item : entries.as_array()) { + if (expanded_index.has_value()) { + if (by_id) { + if (!item.defines(KEYWORD_ID, KEYWORD_ID_HASH)) { + item.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{expanded_index.value()}, + KEYWORD_ID_HASH); + } + } else { + auto types{JSON::make_array()}; + types.push_back(JSON{expanded_index.value()}); + if (item.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + auto existing_types{into_array( + std::move(item.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)))}; + for (auto &existing : existing_types.as_array()) { + types.push_back(existing); + } + } + item.assign(KEYWORD_TYPE, std::move(types)); + } + } + expanded_value.push_back(item); + } + } + } else if (container_includes(definition, KEYWORD_GRAPH)) { + expanded_value = JSON::make_array(); + auto graph_items{into_array( + expand(state, value_context, property, entry.second, entry_pointer))}; + for (auto &item : graph_items.as_array()) { + auto graph{JSON::make_object()}; + graph.assign_assume_new(JSON::String{KEYWORD_GRAPH}, + into_array(std::move(item)), + KEYWORD_GRAPH_HASH); + expanded_value.push_back(std::move(graph)); + } + } else { + if (scoped && value_context.previous && entry.second.is_object() && + !entry.second.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + // A non-propagating property-scoped context applies to the immediate + // node, while nested nodes revert to the previous context. + expanded_value = expand_object(state, value_context, property, + entry.second, entry_pointer); + } else { + expanded_value = + expand(state, value_context, property, entry.second, entry_pointer); + } + } + + // A @list container wraps the expanded value, including a @json-coerced + // one. + if (container_includes(definition, KEYWORD_LIST) && + !expanded_value.is_null() && + !(expanded_value.is_object() && + expanded_value.defines(KEYWORD_LIST, KEYWORD_LIST_HASH))) { + expanded_value = into_array(std::move(expanded_value)); + if (state.processing_1_0) { + for (const auto &item : expanded_value.as_array()) { + if (item.is_object() && + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + throw JSONLDError("List of lists", entry_pointer); + } + } + } + auto wrapper{JSON::make_object()}; + wrapper.assign_assume_new(JSON::String{KEYWORD_LIST}, + std::move(expanded_value), KEYWORD_LIST_HASH); + expanded_value = std::move(wrapper); + } + + if (expanded_value.is_null()) { + continue; + } + + if (definition != nullptr && definition->reverse) { + const auto reverse_items{into_array(JSON{expanded_value})}; + for (const auto &item : reverse_items.as_array()) { + if (item.is_object() && + (item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH))) { + throw JSONLDError("Invalid reverse property value", entry_pointer); + } + } + const auto *existing_reverse{ + result.try_at(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)}; + auto reverse_map{existing_reverse != nullptr ? *existing_reverse + : JSON::make_object()}; + merge(reverse_map, name, into_array(std::move(expanded_value))); + result.assign(KEYWORD_REVERSE, std::move(reverse_map)); + continue; + } + + merge(result, name, into_array(std::move(expanded_value))); + } + for (const auto &[nest_property, nest] : nests) { + // A @nest alias term may carry a property-scoped context for the nested + // entries. + const WeakPointer nest_pointer{source_pointer.concat(*nest_property)}; + const auto definition{active_context.terms.find(*nest_property)}; + if (definition != active_context.terms.cend() && + definition->second.context.has_value()) { + // Process the scoped context into a copy so the term that owns it is not + // freed while it is being read. + ActiveContext nested{active_context}; + const auto saved_base{state.context_base_override}; + state.context_base_override = definition->second.context_base; + const auto saved_override{state.protected_override}; + state.protected_override = true; + process_context(state, nested, definition->second.context.value(), + nest_pointer); + state.protected_override = saved_override; + state.context_base_override = saved_base; + nested.previous = nullptr; + expand_entries(state, nested, type_context, result, active_property, + *nest, nest_pointer); + } else { + expand_entries(state, active_context, type_context, result, + active_property, *nest, nest_pointer); + } + } +} + +} // namespace + +// Expansion (JSON-LD 1.1 API Section 5.1.2) +auto expand(ExpansionState &state, ActiveContext &active_context, + const std::optional &active_property, + const JSON &element, const WeakPointer &pointer) -> JSON { + if (element.is_null()) { + return JSON{nullptr}; + } + + if (!element.is_object() && !element.is_array()) { + if (!active_property.has_value() || + active_property.value() == KEYWORD_GRAPH) { + return JSON{nullptr}; + } + return expand_value(state, active_context, active_property, element); + } + + if (element.is_array()) { + const TermDefinition *definition{nullptr}; + if (active_property.has_value()) { + const auto term{active_context.terms.find(active_property.value())}; + if (term != active_context.terms.cend()) { + definition = &term->second; + } + } + + auto result{JSON::make_array()}; + std::size_t item_index{0}; + for (const auto &item : element.as_array()) { + auto expanded{expand(state, active_context, active_property, item, + pointer.concat(item_index))}; + if (expanded.is_array()) { + for (auto &nested : expanded.as_array()) { + result.push_back(nested); + } + } else if (!expanded.is_null()) { + result.push_back(std::move(expanded)); + } + item_index += 1; + } + + if (container_includes(definition, KEYWORD_LIST)) { + if (state.processing_1_0) { + for (const auto &item : result.as_array()) { + if (item.is_object() && + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + throw JSONLDError("List of lists", pointer); + } + } + } + auto wrapper{JSON::make_object()}; + wrapper.assign_assume_new(JSON::String{KEYWORD_LIST}, std::move(result), + KEYWORD_LIST_HASH); + return wrapper; + } + + return result; + } + + // Revert a non-propagating (type-scoped) context when descending into a node + // object that is neither a value object nor an @id-only reference. + ActiveContext reverted; + ActiveContext *current{&active_context}; + if (active_context.previous) { + bool value_or_id{false}; + const bool single{element.object_size() == 1}; + for (const auto &entry : element.as_object()) { + const auto expanded{expand_iri(state, active_context, entry.first, false, + true, nullptr, nullptr, + empty_weak_pointer)}; + if (expanded.has_value() && + (expanded.value() == KEYWORD_VALUE || + (single && expanded.value() == KEYWORD_ID))) { + value_or_id = true; + break; + } + } + if (!value_or_id) { + reverted = *active_context.previous; + current = &reverted; + } + } + + if (element.defines(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH)) { + ActiveContext local{*current}; + process_context(state, local, + element.at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH), + pointer.concat(keyword_context())); + return expand_object(state, local, active_property, element, pointer); + } + + return expand_object(state, *current, active_property, element, pointer); +} + +} // namespace sourcemeta::core diff --git a/src/core/jsonld/jsonld_iri_expansion.cc b/src/core/jsonld/jsonld_iri_expansion.cc new file mode 100644 index 000000000..736485ad4 --- /dev/null +++ b/src/core/jsonld/jsonld_iri_expansion.cc @@ -0,0 +1,87 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include + +#include // std::optional + +namespace sourcemeta::core { + +// IRI Expansion (JSON-LD 1.1 API Section 5.2) +auto expand_iri(ExpansionState &state, ActiveContext &active_context, + const JSON::String &value, const bool document_relative, + const bool vocabulary, const JSON *const local_context, + DefinedTerms *const defined, const WeakPointer &context_pointer) + -> std::optional { + if (is_keyword(value)) { + return value; + } + + if (has_keyword_form(value)) { + return std::nullopt; + } + + if (local_context != nullptr && defined != nullptr && + local_context->is_object() && local_context->defines(value)) { + const auto iterator{defined->find(value)}; + if (iterator == defined->cend() || !iterator->second) { + create_term_definition(state, active_context, *local_context, value, + *defined, context_pointer); + } + } + + const auto term{active_context.terms.find(value)}; + if (term != active_context.terms.cend() && term->second.iri.has_value() && + is_keyword(term->second.iri.value())) { + return term->second.iri; + } + + if (vocabulary && term != active_context.terms.cend()) { + return term->second.iri; + } + + if (value.find(':', 1) != JSON::String::npos) { + // The term has the form of a compact IRI, so split at the first colon. + const auto colon{value.find(':')}; + const auto prefix{value.substr(0, colon)}; + const auto suffix{value.substr(colon + 1)}; + if (prefix == "_" || suffix.starts_with("//")) { + return value; + } + + if (local_context != nullptr && defined != nullptr && + local_context->is_object() && local_context->defines(prefix)) { + const auto iterator{defined->find(prefix)}; + if (iterator == defined->cend() || !iterator->second) { + create_term_definition(state, active_context, *local_context, prefix, + *defined, context_pointer); + } + } + + const auto definition{active_context.terms.find(prefix)}; + if (definition != active_context.terms.cend() && + definition->second.iri.has_value() && definition->second.prefix) { + return definition->second.iri.value() + suffix; + } + + // The value is only already an IRI when its prefix is a valid scheme. + // Otherwise it is resolved against the vocabulary or document base below. + if (URI::is_scheme(prefix)) { + return value; + } + } + + if (vocabulary && active_context.vocabulary.has_value()) { + return active_context.vocabulary.value() + value; + } + + if (document_relative && active_context.base.has_value()) { + return URI::from_iri(value) + .resolve_from(URI::from_iri(active_context.base.value())) + .recompose(); + } + + return value; +} + +} // namespace sourcemeta::core diff --git a/src/core/jsonld/jsonld_keywords.h b/src/core/jsonld/jsonld_keywords.h new file mode 100644 index 000000000..ab16baea5 --- /dev/null +++ b/src/core/jsonld/jsonld_keywords.h @@ -0,0 +1,124 @@ +#ifndef SOURCEMETA_CORE_JSONLD_KEYWORDS_H_ +#define SOURCEMETA_CORE_JSONLD_KEYWORDS_H_ + +#include +#include + +#include // std::size_t + +namespace sourcemeta::core { + +// The JSON-LD 1.1 keywords (https://www.w3.org/TR/json-ld11/#keywords). +inline constexpr JSON::StringView KEYWORD_BASE{"@base"}; +inline constexpr JSON::StringView KEYWORD_CONTAINER{"@container"}; +inline constexpr JSON::StringView KEYWORD_CONTEXT{"@context"}; +inline constexpr JSON::StringView KEYWORD_DIRECTION{"@direction"}; +inline constexpr JSON::StringView KEYWORD_GRAPH{"@graph"}; +inline constexpr JSON::StringView KEYWORD_ID{"@id"}; +inline constexpr JSON::StringView KEYWORD_IMPORT{"@import"}; +inline constexpr JSON::StringView KEYWORD_INCLUDED{"@included"}; +inline constexpr JSON::StringView KEYWORD_INDEX{"@index"}; +inline constexpr JSON::StringView KEYWORD_JSON{"@json"}; +inline constexpr JSON::StringView KEYWORD_LANGUAGE{"@language"}; +inline constexpr JSON::StringView KEYWORD_LIST{"@list"}; +inline constexpr JSON::StringView KEYWORD_NEST{"@nest"}; +inline constexpr JSON::StringView KEYWORD_NONE{"@none"}; +inline constexpr JSON::StringView KEYWORD_PREFIX{"@prefix"}; +inline constexpr JSON::StringView KEYWORD_PROPAGATE{"@propagate"}; +inline constexpr JSON::StringView KEYWORD_PROTECTED{"@protected"}; +inline constexpr JSON::StringView KEYWORD_REVERSE{"@reverse"}; +inline constexpr JSON::StringView KEYWORD_SET{"@set"}; +inline constexpr JSON::StringView KEYWORD_TYPE{"@type"}; +inline constexpr JSON::StringView KEYWORD_VALUE{"@value"}; +inline constexpr JSON::StringView KEYWORD_VERSION{"@version"}; +inline constexpr JSON::StringView KEYWORD_VOCAB{"@vocab"}; + +// Precomputed object-key hashes for each keyword, so that object access never +// has to recompute them. +inline const auto KEYWORD_BASE_HASH{JSON::Object::hash(KEYWORD_BASE)}; +inline const auto KEYWORD_CONTAINER_HASH{JSON::Object::hash(KEYWORD_CONTAINER)}; +inline const auto KEYWORD_CONTEXT_HASH{JSON::Object::hash(KEYWORD_CONTEXT)}; +inline const auto KEYWORD_DIRECTION_HASH{JSON::Object::hash(KEYWORD_DIRECTION)}; +inline const auto KEYWORD_GRAPH_HASH{JSON::Object::hash(KEYWORD_GRAPH)}; +inline const auto KEYWORD_ID_HASH{JSON::Object::hash(KEYWORD_ID)}; +inline const auto KEYWORD_IMPORT_HASH{JSON::Object::hash(KEYWORD_IMPORT)}; +inline const auto KEYWORD_INCLUDED_HASH{JSON::Object::hash(KEYWORD_INCLUDED)}; +inline const auto KEYWORD_INDEX_HASH{JSON::Object::hash(KEYWORD_INDEX)}; +inline const auto KEYWORD_JSON_HASH{JSON::Object::hash(KEYWORD_JSON)}; +inline const auto KEYWORD_LANGUAGE_HASH{JSON::Object::hash(KEYWORD_LANGUAGE)}; +inline const auto KEYWORD_LIST_HASH{JSON::Object::hash(KEYWORD_LIST)}; +inline const auto KEYWORD_NEST_HASH{JSON::Object::hash(KEYWORD_NEST)}; +inline const auto KEYWORD_NONE_HASH{JSON::Object::hash(KEYWORD_NONE)}; +inline const auto KEYWORD_PREFIX_HASH{JSON::Object::hash(KEYWORD_PREFIX)}; +inline const auto KEYWORD_PROPAGATE_HASH{JSON::Object::hash(KEYWORD_PROPAGATE)}; +inline const auto KEYWORD_PROTECTED_HASH{JSON::Object::hash(KEYWORD_PROTECTED)}; +inline const auto KEYWORD_REVERSE_HASH{JSON::Object::hash(KEYWORD_REVERSE)}; +inline const auto KEYWORD_SET_HASH{JSON::Object::hash(KEYWORD_SET)}; +inline const auto KEYWORD_TYPE_HASH{JSON::Object::hash(KEYWORD_TYPE)}; +inline const auto KEYWORD_VALUE_HASH{JSON::Object::hash(KEYWORD_VALUE)}; +inline const auto KEYWORD_VERSION_HASH{JSON::Object::hash(KEYWORD_VERSION)}; +inline const auto KEYWORD_VOCAB_HASH{JSON::Object::hash(KEYWORD_VOCAB)}; + +// A stable owned copy of the @context keyword, suitable as a JSON Pointer +// token. (A namespace-scope JSON::String constant would trip clang-tidy's +// throwing-static-initialization check, hence the function-local static.) +inline auto keyword_context() -> const JSON::String & { + static const JSON::String value{KEYWORD_CONTEXT}; + return value; +} + +inline auto is_keyword(const JSON::StringView value) -> bool { + if (value.size() < 2 || value.front() != '@') { + return false; + } + return value == KEYWORD_BASE || value == KEYWORD_CONTAINER || + value == KEYWORD_CONTEXT || value == KEYWORD_DIRECTION || + value == KEYWORD_GRAPH || value == KEYWORD_ID || + value == KEYWORD_IMPORT || value == KEYWORD_INCLUDED || + value == KEYWORD_INDEX || value == KEYWORD_JSON || + value == KEYWORD_LANGUAGE || value == KEYWORD_LIST || + value == KEYWORD_NEST || value == KEYWORD_NONE || + value == KEYWORD_PREFIX || value == KEYWORD_PROPAGATE || + value == KEYWORD_PROTECTED || value == KEYWORD_REVERSE || + value == KEYWORD_SET || value == KEYWORD_TYPE || + value == KEYWORD_VALUE || value == KEYWORD_VERSION || + value == KEYWORD_VOCAB; +} + +inline auto is_keyword(const JSON::StringView value, + const JSON::Object::hash_type hash) -> bool { + if (value.size() < 2 || value.front() != '@') { + return false; + } + return hash == KEYWORD_BASE_HASH || hash == KEYWORD_CONTAINER_HASH || + hash == KEYWORD_CONTEXT_HASH || hash == KEYWORD_DIRECTION_HASH || + hash == KEYWORD_GRAPH_HASH || hash == KEYWORD_ID_HASH || + hash == KEYWORD_IMPORT_HASH || hash == KEYWORD_INCLUDED_HASH || + hash == KEYWORD_INDEX_HASH || hash == KEYWORD_JSON_HASH || + hash == KEYWORD_LANGUAGE_HASH || hash == KEYWORD_LIST_HASH || + hash == KEYWORD_NEST_HASH || hash == KEYWORD_NONE_HASH || + hash == KEYWORD_PREFIX_HASH || hash == KEYWORD_PROPAGATE_HASH || + hash == KEYWORD_PROTECTED_HASH || hash == KEYWORD_REVERSE_HASH || + hash == KEYWORD_SET_HASH || hash == KEYWORD_TYPE_HASH || + hash == KEYWORD_VALUE_HASH || hash == KEYWORD_VERSION_HASH || + hash == KEYWORD_VOCAB_HASH; +} + +// Whether the given value has the generic form of a keyword (an `@` followed by +// one or more letters), which the algorithms treat as a reserved token even +// when it is not a defined keyword. +inline auto has_keyword_form(const JSON::StringView value) -> bool { + if (value.size() < 2 || value.front() != '@') { + return false; + } + for (std::size_t index{1}; index < value.size(); index += 1) { + if (!is_alpha(value[index])) { + return false; + } + } + return true; +} + +} // namespace sourcemeta::core + +#endif diff --git a/src/core/jsonld/jsonld_value_expansion.cc b/src/core/jsonld/jsonld_value_expansion.cc new file mode 100644 index 000000000..ed5f3dfcd --- /dev/null +++ b/src/core/jsonld/jsonld_value_expansion.cc @@ -0,0 +1,77 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::optional + +namespace sourcemeta::core { + +// Value Expansion (JSON-LD 1.1 API Section 5.3.3) +auto expand_value(ExpansionState &state, ActiveContext &active_context, + const std::optional &active_property, + const JSON &value) -> JSON { + const TermDefinition *definition{nullptr}; + if (active_property.has_value()) { + const auto iterator{active_context.terms.find(active_property.value())}; + if (iterator != active_context.terms.cend()) { + definition = &iterator->second; + } + } + + if (definition != nullptr && definition->type_mapping.has_value() && + value.is_string()) { + if (definition->type_mapping.value() == KEYWORD_ID) { + auto result{JSON::make_object()}; + const auto identifier{expand_iri(state, active_context, value.to_string(), + true, false, nullptr, nullptr, + empty_weak_pointer)}; + result.assign_assume_new(JSON::String{KEYWORD_ID}, + identifier.has_value() ? JSON{identifier.value()} + : JSON{nullptr}, + KEYWORD_ID_HASH); + return result; + } + if (definition->type_mapping.value() == KEYWORD_VOCAB) { + auto result{JSON::make_object()}; + const auto identifier{expand_iri(state, active_context, value.to_string(), + true, true, nullptr, nullptr, + empty_weak_pointer)}; + result.assign_assume_new(JSON::String{KEYWORD_ID}, + identifier.has_value() ? JSON{identifier.value()} + : JSON{nullptr}, + KEYWORD_ID_HASH); + return result; + } + } + + auto result{JSON::make_object()}; + result.assign_assume_new(JSON::String{KEYWORD_VALUE}, JSON{value}, + KEYWORD_VALUE_HASH); + + if (definition != nullptr && definition->type_mapping.has_value() && + definition->type_mapping.value() != KEYWORD_ID && + definition->type_mapping.value() != KEYWORD_VOCAB && + definition->type_mapping.value() != KEYWORD_NONE) { + result.assign_assume_new(JSON::String{KEYWORD_TYPE}, + JSON{definition->type_mapping.value()}, + KEYWORD_TYPE_HASH); + } else if (value.is_string()) { + const auto language{definition != nullptr && definition->has_language + ? definition->language + : active_context.default_language}; + if (language.has_value()) { + result.assign_assume_new(JSON::String{KEYWORD_LANGUAGE}, + JSON{language.value()}, KEYWORD_LANGUAGE_HASH); + } + const auto direction{definition != nullptr && definition->has_direction + ? definition->direction + : active_context.default_direction}; + if (direction.has_value()) { + result.assign_assume_new(JSON::String{KEYWORD_DIRECTION}, + JSON{direction.value()}, KEYWORD_DIRECTION_HASH); + } + } + + return result; +} + +} // namespace sourcemeta::core diff --git a/test/jsonld/CMakeLists.txt b/test/jsonld/CMakeLists.txt new file mode 100644 index 000000000..afb7c49d3 --- /dev/null +++ b/test/jsonld/CMakeLists.txt @@ -0,0 +1,22 @@ +sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME jsonld + SOURCES jsonld_expand_test.cc jsonld_expand_error_test.cc) + +target_link_libraries(sourcemeta_core_jsonld_unit + PRIVATE sourcemeta::core::jsonld) +target_link_libraries(sourcemeta_core_jsonld_unit + PRIVATE sourcemeta::core::json) +target_link_libraries(sourcemeta_core_jsonld_unit + PRIVATE sourcemeta::core::jsonpointer) + +# W3C JSON-LD Test Suite +# See https://github.com/w3c/json-ld-api +sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME jsonld_suite + SOURCES jsonld_suite.cc) +target_compile_definitions(sourcemeta_core_jsonld_suite_unit + PRIVATE JSONLD_SUITE_PATH="${PROJECT_SOURCE_DIR}/vendor/w3c-json-ld/tests") +target_link_libraries(sourcemeta_core_jsonld_suite_unit + PRIVATE sourcemeta::core::jsonld) +target_link_libraries(sourcemeta_core_jsonld_suite_unit + PRIVATE sourcemeta::core::json) +target_link_libraries(sourcemeta_core_jsonld_suite_unit + PRIVATE sourcemeta::core::text) diff --git a/test/jsonld/jsonld_expand_error_test.cc b/test/jsonld/jsonld_expand_error_test.cc new file mode 100644 index 000000000..db002f007 --- /dev/null +++ b/test/jsonld/jsonld_expand_error_test.cc @@ -0,0 +1,621 @@ +#include + +#include +#include +#include + +#include // std::optional, std::nullopt +#include // std::string + +#define EXPECT_JSONLD_EXPAND_ERROR(expression, expected_code, \ + expected_pointer) \ + try { \ + [[maybe_unused]] const auto result{expression}; \ + FAIL() << "Expected JSON-LD error: " << (expected_code); \ + } catch (const sourcemeta::core::JSONLDError &error) { \ + EXPECT_STREQ(error.what(), (expected_code)); \ + EXPECT_EQ(sourcemeta::core::to_string(error.pointer()), \ + (expected_pointer)); \ + } catch (...) { \ + FAIL() << "Expected a JSONLDError: " << (expected_code); \ + } + +namespace { + +auto remote_resolver() -> sourcemeta::core::JSONLDResolver { + return [](const sourcemeta::core::JSON::StringView identifier) + -> std::optional { + if (identifier == "https://example.com/recursive") { + return sourcemeta::core::parse_json( + R"({ "@context": "https://example.com/recursive" })"); + } + if (identifier == "https://example.com/no-context") { + return sourcemeta::core::parse_json(R"({ "foo": "bar" })"); + } + return std::nullopt; + }; +} + +} // namespace + +TEST(JSONLD_expand_error, cyclic_iri_mapping) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "term": { "@id": "term:term" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Cyclic IRI mapping", "/@context/term"); +} + +TEST(JSONLD_expand_error, invalid_term_definition_empty) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "": "http://example.com/" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid term definition", "/@context"); +} + +TEST(JSONLD_expand_error, keyword_redefinition) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "@type": "http://example.com/" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Keyword redefinition", "/@context/@type"); +} + +TEST(JSONLD_expand_error, protected_term_redefinition) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": [ + { "@protected": true, "a": "http://example.com/a" }, + { "a": "http://example.com/b" } + ] + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Protected term redefinition", "/@context/1/a"); +} + +TEST(JSONLD_expand_error, invalid_protected_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@protected": "yes" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @protected value", + "/@context/a/@protected"); +} + +TEST(JSONLD_expand_error, invalid_iri_mapping) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": true } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid IRI mapping", "/@context/a/@id"); +} + +TEST(JSONLD_expand_error, invalid_keyword_alias) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "@context" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid keyword alias", "/@context/a/@id"); +} + +TEST(JSONLD_expand_error, invalid_reverse_property) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "a": { "@reverse": "http://example.com/a", "@id": "http://example.com/b" } + } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid reverse property", + "/@context/a/@reverse"); +} + +TEST(JSONLD_expand_error, invalid_type_mapping) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@type": true } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid type mapping", "/@context/a/@type"); +} + +TEST(JSONLD_expand_error, invalid_container_mapping) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@container": "@unknown" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid container mapping", + "/@context/a/@container"); +} + +TEST(JSONLD_expand_error, invalid_language_mapping) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@language": true } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid language mapping", + "/@context/a/@language"); +} + +TEST(JSONLD_expand_error, invalid_prefix_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@prefix": "yes" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @prefix value", "/@context/a/@prefix"); +} + +TEST(JSONLD_expand_error, invalid_nest_value_term) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@nest": "@id" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @nest value", "/@context/a/@nest"); +} + +TEST(JSONLD_expand_error, invalid_scoped_context) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "a": { + "@id": "http://example.com/a", + "@context": { "b": { "@id": true } } + } + } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid scoped context", "/@context/a/@context"); +} + +TEST(JSONLD_expand_error, invalid_local_context) { + const auto input = sourcemeta::core::parse_json(R"({ "@context": true })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid local context", "/@context"); +} + +TEST(JSONLD_expand_error, invalid_version_value) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@version": 2.0 } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @version value", "/@context/@version"); +} + +TEST(JSONLD_expand_error, invalid_propagate_value) { + const auto input = sourcemeta::core::parse_json( + R"({ "@context": { "@propagate": "yes" } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @propagate value", + "/@context/@propagate"); +} + +TEST(JSONLD_expand_error, invalid_import_value) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@import": true } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @import value", "/@context/@import"); +} + +TEST(JSONLD_expand_error, invalid_base_iri) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@base": true } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid base IRI", "/@context/@base"); +} + +TEST(JSONLD_expand_error, invalid_vocab_mapping) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@vocab": true } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid vocab mapping", "/@context/@vocab"); +} + +TEST(JSONLD_expand_error, invalid_default_language) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@language": true } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid default language", "/@context/@language"); +} + +TEST(JSONLD_expand_error, invalid_base_direction) { + const auto input = sourcemeta::core::parse_json( + R"({ "@context": { "@direction": "sideways" } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid base direction", "/@context/@direction"); +} + +TEST(JSONLD_expand_error, processing_mode_conflict) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@version": 1.1 } })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "Processing mode conflict", "/@context/@version"); +} + +TEST(JSONLD_expand_error, invalid_context_entry) { + const auto input = + sourcemeta::core::parse_json(R"({ "@context": { "@protected": true } })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "Invalid context entry", "/@context"); +} + +TEST(JSONLD_expand_error, loading_remote_context_failed) { + const auto input = sourcemeta::core::parse_json( + R"({ "@context": "https://example.com/missing" })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", remote_resolver()), + "Loading remote context failed", "/@context"); +} + +TEST(JSONLD_expand_error, invalid_remote_context) { + const auto input = sourcemeta::core::parse_json( + R"({ "@context": "https://example.com/no-context" })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", remote_resolver()), + "Invalid remote context", "/@context"); +} + +TEST(JSONLD_expand_error, recursive_context_inclusion) { + const auto input = sourcemeta::core::parse_json( + R"({ "@context": "https://example.com/recursive" })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", remote_resolver()), + "Recursive context inclusion", "/@context"); +} + +TEST(JSONLD_expand_error, colliding_keywords) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "id": "@id" }, + "@id": "http://example.com/a", + "id": "http://example.com/b" + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Colliding keywords", "/id"); +} + +TEST(JSONLD_expand_error, invalid_id_value) { + const auto input = sourcemeta::core::parse_json(R"({ "@id": true })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @id value", "/@id"); +} + +TEST(JSONLD_expand_error, invalid_type_value) { + const auto input = sourcemeta::core::parse_json(R"({ "@type": true })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid type value", "/@type"); +} + +TEST(JSONLD_expand_error, invalid_value_object) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { + "@value": "x", "@type": "http://example.com/t", "@language": "en" + } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid value object", + "/http:~1~1example.com~1p"); +} + +TEST(JSONLD_expand_error, invalid_language_tagged_string) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@value": "x", "@language": true } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid language-tagged string", + "/http:~1~1example.com~1p/@language"); +} + +TEST(JSONLD_expand_error, invalid_language_tagged_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@value": 1, "@language": "en" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid language-tagged value", + "/http:~1~1example.com~1p"); +} + +TEST(JSONLD_expand_error, invalid_typed_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@value": "x", "@type": "_:b" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid typed value", "/http:~1~1example.com~1p"); +} + +TEST(JSONLD_expand_error, invalid_value_object_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@value": { "a": 1 } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid value object value", + "/http:~1~1example.com~1p"); +} + +TEST(JSONLD_expand_error, invalid_set_or_list_object) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@list": [ "a" ], "@id": "http://example.com/x" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid set or list object", + "/http:~1~1example.com~1p"); +} + +TEST(JSONLD_expand_error, invalid_index_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@index": true, "@value": "x" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @index value", + "/http:~1~1example.com~1p/@index"); +} + +TEST(JSONLD_expand_error, invalid_reverse_value) { + const auto input = sourcemeta::core::parse_json(R"({ "@reverse": "foo" })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @reverse value", "/@reverse"); +} + +TEST(JSONLD_expand_error, invalid_reverse_property_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "@reverse": { "http://example.com/p": { "@value": "x" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid reverse property value", "/@reverse"); +} + +TEST(JSONLD_expand_error, invalid_included_value) { + const auto input = + sourcemeta::core::parse_json(R"({ "@included": { "@value": "x" } })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @included value", "/@included"); +} + +TEST(JSONLD_expand_error, invalid_nest_value_expansion) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "nest": "@nest" }, + "nest": { "@value": "x" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid @nest value", "/nest"); +} + +TEST(JSONLD_expand_error, list_of_lists) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/p": { "@list": [ { "@list": [ "a" ] } ] } + })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "List of lists", "/http:~1~1example.com~1p/@list"); +} + +TEST(JSONLD_expand_error, invalid_base_direction_in_value_object) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "p": "http://example.com/p" }, + "p": { "@value": "v", "@direction": "up" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid base direction", "/p/@direction"); +} + +TEST(JSONLD_expand_error, keyword_alias_dropped_inside_reverse_map) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "none": "@none" }, + "@reverse": { "none": "x" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid reverse property map", "/@reverse/none"); +} + +TEST(JSONLD_expand_error, colliding_type_in_json_ld_1_0) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "type": "@type" }, + "@type": "http://example.com/A", + "type": "http://example.com/B" + })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "Colliding keywords", "/type"); +} + +TEST(JSONLD_expand_error, invalid_language_tagged_string_without_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "p": "http://example.com/p" }, + "p": { "@language": 42 } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid language-tagged string", "/p/@language"); +} + +TEST(JSONLD_expand_error, protected_in_term_definition_in_1_0) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@protected": true } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "Invalid term definition", "/@context/a/@protected"); +} + +TEST(JSONLD_expand_error, nest_in_term_definition_in_1_0) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@nest": "@nest" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "Invalid term definition", "/@context/a/@nest"); +} + +TEST(JSONLD_expand_error, prefix_on_compact_iri_term) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "ex": "http://example.com/", + "ex:foo": { "@id": "http://example.com/foo", "@prefix": true } + } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid term definition", + "/@context/ex:foo/@prefix"); +} + +TEST(JSONLD_expand_error, invalid_base_direction_in_term_definition) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@direction": "up" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid base direction", + "/@context/a/@direction"); +} + +TEST(JSONLD_expand_error, invalid_container_array_combination) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "a": { "@id": "http://example.com/a", "@container": [ "@id", "@language" ] } + } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid container mapping", + "/@context/a/@container"); +} + +TEST(JSONLD_expand_error, unknown_entry_in_term_definition) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "a": { "@id": "http://example.com/a", "@bogus": true } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid term definition", "/@context/a"); +} + +TEST(JSONLD_expand_error, type_keyword_container_id) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "@type": { "@container": "@id" } } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Keyword redefinition", "/@context/@type"); +} + +TEST(JSONLD_expand_error, invalid_version_precedes_mode_conflict) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "@version": 1.5 } + })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", {}, + sourcemeta::core::JSONLDVersion::V1_0), + "Invalid @version value", "/@context/@version"); +} + +TEST(JSONLD_expand_error, relative_base_without_base) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": [ { "@base": null }, { "@base": "relative/path" } ] + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid base IRI", "/@context/1/@base"); +} + +TEST(JSONLD_expand_error, protected_null_term_redefinition) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": [ + { "term": { "@id": null, "@protected": true } }, + { "term": "http://example.com/x" } + ] + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Protected term redefinition", "/@context/1/term"); +} + +TEST(JSONLD_expand_error, free_floating_invalid_set_or_list_object) { + const auto input = sourcemeta::core::parse_json(R"({ + "@list": [ "foo" ], "@id": "http://example.com/bar" + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid set or list object", ""); +} + +TEST(JSONLD_expand_error, import_loading_failed) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "@import": "https://example.com/unknown" } + })"); + + EXPECT_JSONLD_EXPAND_ERROR( + sourcemeta::core::jsonld_expand(input, "", remote_resolver()), + "Loading remote context failed", "/@context/@import"); +} + +TEST(JSONLD_expand_error, duplicate_container_keyword) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "a": { "@id": "http://example.com/a", "@container": [ "@set", "@set" ] } + } + })"); + + EXPECT_JSONLD_EXPAND_ERROR(sourcemeta::core::jsonld_expand(input), + "Invalid container mapping", + "/@context/a/@container"); +} + +TEST(JSONLD_expand_error, error_code_value_is_owned) { + std::string code{"A custom error code longer than small string optimization"}; + const sourcemeta::core::JSONLDError error{code.c_str(), + sourcemeta::core::Pointer{}}; + code = std::string{}; + EXPECT_STREQ(error.what(), + "A custom error code longer than small string optimization"); +} diff --git a/test/jsonld/jsonld_expand_test.cc b/test/jsonld/jsonld_expand_test.cc new file mode 100644 index 000000000..8c94913c8 --- /dev/null +++ b/test/jsonld/jsonld_expand_test.cc @@ -0,0 +1,390 @@ +#include + +#include +#include + +#include // std::optional, std::nullopt + +TEST(JSONLD_expand, empty_object) { + const auto input = sourcemeta::core::parse_json("{}"); + const auto expected = sourcemeta::core::parse_json("[]"); + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, absolute_iri_property_with_string_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/foo": "bar" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/foo": [ { "@value": "bar" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, node_with_id_and_property) { + const auto input = sourcemeta::core::parse_json(R"({ + "@id": "http://example.com/a", + "http://example.com/foo": "bar" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "@id": "http://example.com/a", + "http://example.com/foo": [ { "@value": "bar" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, type_is_made_an_array) { + const auto input = sourcemeta::core::parse_json(R"({ + "@id": "http://example.com/a", + "@type": "http://example.com/T", + "http://example.com/foo": "bar" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "@id": "http://example.com/a", + "@type": [ "http://example.com/T" ], + "http://example.com/foo": [ { "@value": "bar" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, multiple_values_preserve_order) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/foo": [ "a", "b" ] + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/foo": [ { "@value": "a" }, { "@value": "b" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, numeric_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/foo": 1 + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/foo": [ { "@value": 1 } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, boolean_value) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/foo": true + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/foo": [ { "@value": true } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, undefined_term_without_context_is_dropped) { + const auto input = sourcemeta::core::parse_json(R"({ + "foo": "bar" + })"); + + const auto expected = sourcemeta::core::parse_json("[]"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, term_maps_to_iri) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "name": "http://example.com/name" }, + "name": "John" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/name": [ { "@value": "John" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, vocabulary_mapping) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "@vocab": "http://example.com/" }, + "name": "John" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/name": [ { "@value": "John" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, compact_iri_via_prefix) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "ex": "http://example.com/" }, + "ex:name": "John" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/name": [ { "@value": "John" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, type_coercion_to_id) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "knows": { "@id": "http://example.com/knows", "@type": "@id" } + }, + "knows": "http://example.com/jane" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/knows": [ { "@id": "http://example.com/jane" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, type_coercion_to_datatype) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "born": { + "@id": "http://example.com/born", + "@type": "http://www.w3.org/2001/XMLSchema#date" + } + }, + "born": "1990-01-01" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/born": [ + { + "@value": "1990-01-01", + "@type": "http://www.w3.org/2001/XMLSchema#date" + } + ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, default_language) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "@language": "en", "name": "http://example.com/name" }, + "name": "John" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/name": [ { "@value": "John", "@language": "en" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, list_keyword) { + const auto input = sourcemeta::core::parse_json(R"({ + "http://example.com/foo": { "@list": [ "a", "b" ] } + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/foo": [ + { "@list": [ { "@value": "a" }, { "@value": "b" } ] } + ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, direction_dropped_in_json_ld_1_0) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "p": "http://example.com/p" }, + "p": { "@value": "v", "@direction": "rtl" } + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { "http://example.com/p": [ { "@value": "v" } ] } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand( + input, "", {}, sourcemeta::core::JSONLDVersion::V1_0), + expected); +} + +TEST(JSONLD_expand, included_dropped_in_json_ld_1_0) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "p": "http://example.com/p" }, + "p": "v", + "@included": { "@id": "http://example.com/other" } + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { "http://example.com/p": [ { "@value": "v" } ] } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand( + input, "", {}, sourcemeta::core::JSONLDVersion::V1_0), + expected); +} + +TEST(JSONLD_expand, nest_term_whose_scoped_context_redefines_itself) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "nest": { + "@id": "@nest", + "@context": { "nest": { "@id": "http://example.com/nest" } } + } + }, + "nest": { "http://example.com/foo": "bar" } + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { "http://example.com/foo": [ { "@value": "bar" } ] } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, json_typed_value_in_list_container) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "e": { + "@id": "http://example.com/e", + "@type": "@json", + "@container": "@list" + } + }, + "e": 42 + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/e": [ + { "@list": [ { "@value": 42, "@type": "@json" } ] } + ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, id_typed_keyword_form_value_expands_to_null) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { "p": { "@id": "http://example.com/p", "@type": "@id" } }, + "p": "@foo" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { "http://example.com/p": [ { "@id": null } ] } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, non_expandable_type_value_is_omitted) { + const auto input = sourcemeta::core::parse_json(R"({ + "@id": "http://example.com/n", + "@type": "@foo", + "http://example.com/p": "v" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "@id": "http://example.com/n", + "http://example.com/p": [ { "@value": "v" } ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, graph_value_expanding_to_null_yields_no_element) { + const auto input = sourcemeta::core::parse_json(R"({ "@graph": "scalar" })"); + const auto expected = sourcemeta::core::parse_json("[]"); + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} + +TEST(JSONLD_expand, base_in_remote_context_is_ignored) { + const sourcemeta::core::JSONLDResolver resolver = + [](const sourcemeta::core::JSON::StringView identifier) + -> std::optional { + if (identifier == "https://example.com/remote-base") { + return sourcemeta::core::parse_json( + R"({ "@context": { "@base": "http://remote.example/" } })"); + } + return std::nullopt; + }; + + const auto input = sourcemeta::core::parse_json(R"({ + "@context": "https://example.com/remote-base", + "@id": "relative-node", + "http://example.com/p": "v" + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "@id": "http://doc.example/relative-node", + "http://example.com/p": [ { "@value": "v" } ] + } + ])"); + + EXPECT_EQ( + sourcemeta::core::jsonld_expand(input, "http://doc.example/", resolver), + expected); +} + +TEST(JSONLD_expand, language_map_direction_uses_property_scoped_context) { + const auto input = sourcemeta::core::parse_json(R"({ + "@context": { + "p": { + "@id": "http://example.com/p", + "@container": "@language", + "@context": { "@direction": "rtl" } + } + }, + "p": { "en": "hello" } + })"); + + const auto expected = sourcemeta::core::parse_json(R"([ + { + "http://example.com/p": [ + { "@value": "hello", "@language": "en", "@direction": "rtl" } + ] + } + ])"); + + EXPECT_EQ(sourcemeta::core::jsonld_expand(input), expected); +} diff --git a/test/jsonld/jsonld_suite.cc b/test/jsonld/jsonld_suite.cc new file mode 100644 index 000000000..f6ed35f83 --- /dev/null +++ b/test/jsonld/jsonld_suite.cc @@ -0,0 +1,172 @@ +#include + +#include +#include +#include + +#include // std::filesystem +#include // std::optional +#include // std::string +#include // std::string_view +#include // std::move + +namespace { + +struct JSONLDExpandCase { + std::filesystem::path suite_root; + sourcemeta::core::JSON::String base_prefix; + std::filesystem::path input; + std::filesystem::path expect; + sourcemeta::core::JSON::String error_code; + sourcemeta::core::JSON::String base_iri; + sourcemeta::core::JSONLDVersion version; + bool negative; + std::optional expand_context; +}; + +class JSONLDExpandTest : public testing::Test { +public: + explicit JSONLDExpandTest(JSONLDExpandCase test_case) + : test_case_{std::move(test_case)} {} + + auto TestBody() -> void override { + const auto &test_case{this->test_case_}; + const sourcemeta::core::JSONLDResolver resolver = + [&test_case](const sourcemeta::core::JSON::StringView identifier) + -> std::optional { + if (!identifier.starts_with(test_case.base_prefix)) { + return std::nullopt; + } + // Dereferencing a context IRI ignores any fragment or query component. + const auto suffix{identifier.substr(test_case.base_prefix.size())}; + const auto path{test_case.suite_root / + suffix.substr(0, suffix.find_first_of("#?"))}; + if (!std::filesystem::exists(path)) { + return std::nullopt; + } + return sourcemeta::core::read_json(path); + }; + + const auto input{sourcemeta::core::read_json(test_case.input)}; + + if (test_case.negative) { + try { + if (test_case.expand_context.has_value()) { + const auto context{ + sourcemeta::core::read_json(test_case.expand_context.value())}; + static_cast(sourcemeta::core::jsonld_expand( + input, context, test_case.base_iri, resolver, test_case.version)); + } else { + static_cast(sourcemeta::core::jsonld_expand( + input, test_case.base_iri, resolver, test_case.version)); + } + FAIL() << "Expected error code: " << test_case.error_code; + } catch (const sourcemeta::core::JSONLDError &error) { + // The implementation capitalises the first letter of every error code, + // whereas the upstream suite expresses them in lower case. + std::string actual_code{error.what()}; + if (!actual_code.empty()) { + actual_code.front() = + sourcemeta::core::to_lowercase(actual_code.front()); + } + EXPECT_EQ(static_cast(test_case.error_code), + actual_code); + } + } else { + const auto expected{sourcemeta::core::read_json(test_case.expect)}; + if (test_case.expand_context.has_value()) { + const auto context{ + sourcemeta::core::read_json(test_case.expand_context.value())}; + EXPECT_EQ(sourcemeta::core::jsonld_expand(input, context, + test_case.base_iri, resolver, + test_case.version), + expected); + } else { + EXPECT_EQ(sourcemeta::core::jsonld_expand(input, test_case.base_iri, + resolver, test_case.version), + expected); + } + } + } + +private: + JSONLDExpandCase test_case_; +}; + +auto sanitize(const std::string_view identifier) -> std::string { + std::string result; + for (const auto character : identifier) { + if (character == '#') { + continue; + } + result += sourcemeta::core::is_alphanum(character) ? character : '_'; + } + return result; +} + +auto register_case(const sourcemeta::core::JSON &entry, + const std::filesystem::path &suite_root, + const sourcemeta::core::JSON::String &base_prefix) -> void { + bool negative{false}; + for (const auto &type : entry.at("@type").as_array()) { + if (type.to_string() == "jld:NegativeEvaluationTest") { + negative = true; + } + } + + const auto &input_relative{entry.at("input").to_string()}; + + JSONLDExpandCase test_case; + test_case.suite_root = suite_root; + test_case.base_prefix = base_prefix; + test_case.input = suite_root / input_relative; + test_case.base_iri = base_prefix + input_relative; + test_case.version = sourcemeta::core::JSONLDVersion::V1_1; + test_case.negative = negative; + + if (entry.defines("option")) { + const auto &option{entry.at("option")}; + if (option.defines("base")) { + test_case.base_iri = option.at("base").to_string(); + } + if ((option.defines("specVersion") && + option.at("specVersion").to_string() == "json-ld-1.0") || + (option.defines("processingMode") && + option.at("processingMode").to_string() == "json-ld-1.0")) { + test_case.version = sourcemeta::core::JSONLDVersion::V1_0; + } + if (option.defines("expandContext")) { + test_case.expand_context = + suite_root / option.at("expandContext").to_string(); + } + } + + if (negative) { + test_case.error_code = entry.at("expectErrorCode").to_string(); + } else { + test_case.expect = suite_root / entry.at("expect").to_string(); + } + + testing::RegisterTest( + "JSONLD_expand", sanitize(entry.at("@id").to_string()).c_str(), nullptr, + nullptr, __FILE__, __LINE__, [test_case]() -> testing::Test * { + return new JSONLDExpandTest(test_case); + }); +} + +} // namespace + +auto main(int argc, char **argv) -> int { + testing::InitGoogleTest(&argc, argv); + + const std::filesystem::path suite_root{JSONLD_SUITE_PATH}; + const auto manifest{ + sourcemeta::core::read_json(suite_root / "expand-manifest.jsonld")}; + const auto &base_prefix{manifest.at("baseIri").to_string()}; + + for (const auto &entry : manifest.at("sequence").as_array()) { + register_case(entry, suite_root, base_prefix); + } + + return RUN_ALL_TESTS(); +} diff --git a/test/packaging/find_package/CMakeLists.txt b/test/packaging/find_package/CMakeLists.txt index a79a1fc21..bb01ddc8e 100644 --- a/test/packaging/find_package/CMakeLists.txt +++ b/test/packaging/find_package/CMakeLists.txt @@ -23,6 +23,7 @@ target_link_libraries(core_hello PRIVATE sourcemeta::core::uri) target_link_libraries(core_hello PRIVATE sourcemeta::core::uritemplate) target_link_libraries(core_hello PRIVATE sourcemeta::core::json) target_link_libraries(core_hello PRIVATE sourcemeta::core::jsonpointer) +target_link_libraries(core_hello PRIVATE sourcemeta::core::jsonld) target_link_libraries(core_hello PRIVATE sourcemeta::core::jsonl) target_link_libraries(core_hello PRIVATE sourcemeta::core::yaml) target_link_libraries(core_hello PRIVATE sourcemeta::core::gzip) diff --git a/test/packaging/find_package/hello.cc b/test/packaging/find_package/hello.cc index 8dd59c4be..5b8672aa5 100644 --- a/test/packaging/find_package/hello.cc +++ b/test/packaging/find_package/hello.cc @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include