Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions regression/cbmc/xml-nonprintable-chars/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#include <assert.h>

int main(void)
{
const unsigned char *str = (unsigned char *)"\x00";
assert(*str == '\x01');
return 0;
}
13 changes: 13 additions & 0 deletions regression/cbmc/xml-nonprintable-chars/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
CORE
main.c
--trace --xml-ui
^EXIT=10$
^SIGNAL=0$
VERIFICATION FAILED
--
XML does not support escaping non-printable character
--
Test that cbmc with --trace --xml-ui handles non-printable characters
gracefully without crashing when they appear in counterexample traces.
This test verifies that the fix for character 0x01 allows XML generation
to complete successfully by escaping them using C-style sequences (e.g. \0, \x01).
51 changes: 36 additions & 15 deletions src/util/xml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@ Author: Daniel Kroening, kroening@kroening.com

#include "xml.h"

#include <ostream>

#include "exception_utils.h"
#include "string2int.h"
#include "structured_data.h"

#include <iomanip>
#include <ostream>

void xmlt::clear()
{
data.clear();
Expand Down Expand Up @@ -75,6 +76,34 @@ void xmlt::output(std::ostream &out, unsigned indent) const
out << '<' << '/' << name << '>' << "\n";
}

/// Escape a character that is not valid in XML 1.0. Characters below 0x20
/// (other than TAB, LF, CR) and DEL (0x7F) are not valid in XML 1.0, not
/// even as numeric character references. Encode them as C-style escape
/// sequences with fixed-width two-digit hex. Backslashes are escaped to
/// avoid ambiguity.
/// \return true if the character was handled, false if it should be emitted
/// as-is
static bool escape_non_printable(char ch, std::ostream &out)
{
if(ch == '\0')
{
out << "\\0";
return true;
}
else if(static_cast<unsigned char>(ch) < 32u || ch == 0x7F)
{
out << "\\x" << std::hex << std::setfill('0') << std::setw(2)
<< (unsigned int)(unsigned char)ch << std::dec;
return true;
}
else if(ch == '\\')
{
out << "\\\\";
return true;
}
return false;
}

/// escaping for XML elements
void xmlt::escape(const std::string &s, std::ostream &out)
{
Expand All @@ -101,17 +130,13 @@ void xmlt::escape(const std::string &s, std::ostream &out)
out << '\n';
break;

case 0x9: // TAB
case 0x7F: // DEL
case 0x9: // TAB
out << "&#" << std::to_string((unsigned char)ch) << ';';
break;

default:
DATA_INVARIANT(
static_cast<unsigned char>(ch) >= 32u,
"XML does not support escaping non-printable character " +
std::to_string((unsigned char)ch));
out << ch;
if(!escape_non_printable(ch, out))
out << ch;
}
}
}
Expand Down Expand Up @@ -143,16 +168,12 @@ void xmlt::escape_attribute(const std::string &s, std::ostream &out)
case 0x9: // TAB
case 0xA: // LF
case 0xD: // CR
case 0x7F: // DEL
out << "&#" << std::to_string((unsigned char)ch) << ';';
break;

default:
DATA_INVARIANT(
static_cast<unsigned char>(ch) >= 32u,
"XML does not support escaping non-printable character " +
std::to_string((unsigned char)ch));
out << ch;
if(!escape_non_printable(ch, out))
out << ch;
}
}
}
Expand Down
77 changes: 76 additions & 1 deletion unit/util/xml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@ Author: Thomas Kiley

\*******************************************************************/

#include <testing-utils/use_catch.h>
#include <util/xml.h>

#include <testing-utils/use_catch.h>

#include <sstream>

TEST_CASE("xml_equal", "[core][util][xml]")
{
SECTION("Empty xml")
Expand Down Expand Up @@ -97,3 +100,75 @@ TEST_CASE("xml_equal", "[core][util][xml]")
}
}
}

TEST_CASE("xml_escape_nonprintable", "[core][util][xml]")
{
SECTION("Escaping non-printable characters in attributes")
{
xmlt node{"test"};
node.set_attribute("value", std::string("\x00\x01\x02\x1F", 4));

std::ostringstream out;
node.output(out);

std::string result = out.str();
// Characters invalid in XML 1.0 are encoded as C-style escapes
REQUIRE(result.find("\\0") != std::string::npos);
REQUIRE(result.find("\\x01") != std::string::npos);
REQUIRE(result.find("\\x02") != std::string::npos);
REQUIRE(result.find("\\x1f") != std::string::npos);
Comment thread
tautschnig marked this conversation as resolved.
}

SECTION("Escaping non-printable characters in data")
{
xmlt node{"test"};
node.data = std::string("\x00\x01\x02\x1F", 4);

std::ostringstream out;
node.output(out);

std::string result = out.str();
// Characters invalid in XML 1.0 are encoded as C-style escapes
REQUIRE(result.find("\\0") != std::string::npos);
REQUIRE(result.find("\\x01") != std::string::npos);
REQUIRE(result.find("\\x02") != std::string::npos);
REQUIRE(result.find("\\x1f") != std::string::npos);
}

SECTION("Valid XML 1.0 control characters use numeric references")
{
xmlt node{"test"};
node.set_attribute("value", std::string("\x09", 1));

std::ostringstream out;
node.output(out);

std::string result = out.str();
// TAB is valid in XML 1.0 and uses numeric character reference
REQUIRE(result.find("&#9;") != std::string::npos);
}

SECTION("Backslashes are escaped")
{
xmlt node{"test"};
node.set_attribute("value", "back\\slash");

std::ostringstream out;
node.output(out);

std::string result = out.str();
REQUIRE(result.find("back\\\\slash") != std::string::npos);
}

SECTION("Standard printable characters unchanged")
{
xmlt node{"test"};
node.set_attribute("value", "Hello World!");

std::ostringstream out;
node.output(out);

std::string result = out.str();
REQUIRE(result.find("Hello World!") != std::string::npos);
}
}
Loading