diff --git a/src/XMLDocument.h b/src/XMLDocument.h index 36e8c280c..7129adf78 100644 --- a/src/XMLDocument.h +++ b/src/XMLDocument.h @@ -326,19 +326,42 @@ struct XMLDocument: public unique_free_d, public XMLNode } }, nullptr, &f, XML_CHAR_ENCODING_NONE)); ctxt->options |= XML_PARSE_NOENT|XML_PARSE_DTDLOAD|XML_PARSE_DTDATTR|XML_PARSE_NONET|XML_PARSE_NODICT; + if(hugeFile) + ctxt->options |= XML_PARSE_HUGE; #if LIBXML_VERSION >= 21300 ctxt->options |= XML_PARSE_NO_XXE; #else ctxt->loadsubset |= XML_DETECT_IDS|XML_COMPLETE_ATTRS; -#endif - if(hugeFile) - { - ctxt->options |= XML_PARSE_HUGE; -#if LIBXML_VERSION < 21300 - if(ctxt->sax) - ctxt->sax->entityDecl = nullptr; -#endif + if(ctxt->sax) { + ctxt->sax->entityDecl = [](void *ctx, const xmlChar *name, int type, + const xmlChar *publicId, const xmlChar *systemId, + xmlChar *content) noexcept { + auto *ctxt = static_cast(ctx); + auto *doc = ctxt->myDoc; + if(!doc) return; + if((ctxt->options & XML_PARSE_HUGE) && + (type == XML_INTERNAL_GENERAL_ENTITY || type == XML_INTERNAL_PARAMETER_ENTITY)) { + ctxt->wellFormed = 0; + xmlStopParser(ctxt); + return; + } + if(type == XML_EXTERNAL_GENERAL_PARSED_ENTITY) + xmlAddDocEntity(doc, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr, (const xmlChar*)""); + else + xmlAddDocEntity(doc, name, type, publicId, systemId, content); + }; + ctxt->sax->resolveEntity = [](void*, const xmlChar*, const xmlChar*) noexcept -> xmlParserInputPtr { + return nullptr; + }; + ctxt->sax->getEntity = [](void *ctx, const xmlChar *name) noexcept -> xmlEntityPtr { + auto *ctxt = static_cast(ctx); + auto *ent = xmlGetDocEntity(ctxt->myDoc, name); + if(!ent && ctxt->myDoc) + ent = xmlAddDocEntity(ctxt->myDoc, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr, (const xmlChar*)""); + return ent; + }; } +#endif auto result = xmlParseDocument(ctxt.get()); if(result != 0 || !ctxt->wellFormed) { diff --git a/test/data/xxe-sentinel-dtd.dtd b/test/data/xxe-sentinel-dtd.dtd new file mode 100644 index 000000000..e821c9ebc --- /dev/null +++ b/test/data/xxe-sentinel-dtd.dtd @@ -0,0 +1 @@ + diff --git a/test/data/xxe-sentinel.txt b/test/data/xxe-sentinel.txt new file mode 100644 index 000000000..6ffa85cd3 --- /dev/null +++ b/test/data/xxe-sentinel.txt @@ -0,0 +1 @@ +XXESENTINEL diff --git a/test/libdigidocpp_boost.cpp b/test/libdigidocpp_boost.cpp index d449ef5e4..22a5d1475 100644 --- a/test/libdigidocpp_boost.cpp +++ b/test/libdigidocpp_boost.cpp @@ -616,4 +616,54 @@ BOOST_AUTO_TEST_CASE(XMLBomb) if(std::fstream f{"xml-bomb-cont.xml"}) BOOST_CHECK_THROW(XMLDocument::openStream(f, {}, true), Exception); } +BOOST_AUTO_TEST_CASE(XMLXXE) +{ + BOOST_REQUIRE(fs::exists("xxe-sentinel.txt")); + BOOST_REQUIRE(fs::exists("xxe-sentinel-dtd.dtd")); + + // Absolute file:// URIs so the entity loader can resolve them even from a + // stream context (which has no base URL). Without protection these files + // would be loaded and XXESENTINEL would appear in the document; with + // protection they are blocked before any file access occurs. + auto fileUri = [](const char *name) { + auto s = fs::absolute(name).generic_string(); + return s.front() == '/' ? "file://" + s : "file:///" + s; + }; + const auto sentinelUri = fileUri("xxe-sentinel.txt"); + const auto dtdUri = fileUri("xxe-sentinel-dtd.dtd"); + + // Each payload would leak XXESENTINEL into if the relevant XXE + // vector is not blocked. + const std::string payloads[] = { + // General entity via local file + "" + "]>" + "&xxe;", + // General entity via network (blocked by XML_PARSE_NONET) + "" + "]>" + "&xxe;", + // External DTD subset that defines &sentinel; as XXESENTINEL + "" + "" + "&sentinel;", + // Parameter entity that injects the DTD to define &sentinel; + "" + "" + "%sentinel;" + "]>&sentinel;", + }; + + for(const auto &payload : payloads) { + for(bool huge : {false, true}) { + std::istringstream is{std::string(payload)}; + auto doc = XMLDocument::openStream(is, {}, huge); + xmlChar *raw = xmlNodeGetContent(doc.d); + std::string_view text = raw ? (const char*)raw : ""; + BOOST_CHECK(!text.contains("XXESENTINEL")); + xmlFree(raw); + } + } +} BOOST_AUTO_TEST_SUITE_END()