Skip to content

Commit a440eb6

Browse files
authored
AVRO 4175: [C++] Allow previously parsed schemas to be referenced when parsing a schema (#3475)
1 parent 05ba36f commit a440eb6

3 files changed

Lines changed: 96 additions & 1 deletion

File tree

lang/c++/impl/Compiler.cc

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,15 @@ static NodePtr makeNode(const string &t, SymbolTable &st, const string &ns) {
9494

9595
auto it = st.find(n);
9696
if (it != st.end()) {
97-
return NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second));
97+
// Return the raw NodePtr instead of creating a new "NodeSymbolic"
98+
// via "NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second))"
99+
// in order to support externally resolved named references.
100+
// This is safe because the validator canonicalizes duplicates:
101+
// when it sees the same named node again (including self-recursion),
102+
// it replaces that leaf with a NodeSymbolic via "setLeafToSymbolic".
103+
// So even if the raw NodePtr is returned initially, validation
104+
// converts repeats to symbolic links.
105+
return it->second;
98106
}
99107
throw Exception("Unknown type: {}", n);
100108
}
@@ -638,4 +646,23 @@ AVRO_DECL bool compileJsonSchema(std::istream &is, ValidSchema &schema, string &
638646
}
639647
}
640648

649+
AVRO_DECL ValidSchema compileJsonSchemaWithNamedReferences(std::istream &is,
650+
const std::map<Name, ValidSchema> &namedReferences) {
651+
if (!is.good()) {
652+
throw Exception("Input stream is not good");
653+
}
654+
655+
std::unique_ptr<InputStream> in = istreamInputStream(is);
656+
json::Entity e = json::loadEntity(*in);
657+
658+
// Convert the map<Name, ValidSchema> to SymbolTable (map<Name, NodePtr>)
659+
SymbolTable st;
660+
for (const auto &entry : namedReferences) {
661+
st[entry.first] = entry.second.root();
662+
}
663+
664+
NodePtr n = makeNode(e, st, "");
665+
return ValidSchema(n);
666+
}
667+
641668
} // namespace avro

lang/c++/include/avro/Compiler.hh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "Config.hh"
2323
#include <cstdint>
2424
#include <istream>
25+
#include <map>
2526

2627
namespace avro {
2728

@@ -32,6 +33,7 @@ class AVRO_DECL InputStream;
3233
/// lexer object for each parse. The bison parser also uses this class to
3334
/// build up an avro parse tree as the avro spec is parsed.
3435

36+
class AVRO_DECL Name;
3537
class AVRO_DECL ValidSchema;
3638

3739
/// Given a stream containing a JSON schema, compiles the schema to a
@@ -58,6 +60,9 @@ AVRO_DECL ValidSchema compileJsonSchemaFromString(const std::string &input);
5860

5961
AVRO_DECL ValidSchema compileJsonSchemaFromFile(const char *filename);
6062

63+
AVRO_DECL ValidSchema compileJsonSchemaWithNamedReferences(std::istream &is,
64+
const std::map<Name, ValidSchema> &namedReferences);
65+
6166
} // namespace avro
6267

6368
#endif

lang/c++/test/CompilerTests.cc

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <boost/test/unit_test.hpp>
2323

2424
#include "Compiler.hh"
25+
#include "Node.hh"
2526
#include "ValidSchema.hh"
2627

2728
// Assert that empty defaults don't make json schema compilation violate bounds
@@ -82,12 +83,74 @@ void test2dArray() {
8283
BOOST_CHECK_EQUAL(expected, actual.str());
8384
}
8485

86+
void testRecordWithNamedReference() {
87+
std::string nestedSchema = "{\"name\":\"NestedRecord\",\"type\":\"record\",\"fields\":[{\"name\":\"stringField\",\"type\":\"string\"}]}";
88+
// The root schema references the nested schema above by name only.
89+
// This mimics tools that allow schemas to have references to other schemas.
90+
std::string rootSchema = "{\"name\":\"RootRecord\",\"type\":\"record\",\"fields\":[{\"name\": \"nestedField\",\"type\":\"NestedRecord\"}]}";
91+
92+
// First compile the nested schema
93+
avro::ValidSchema nestedRecord = avro::compileJsonSchemaFromString(nestedSchema);
94+
95+
// Create a map of named references
96+
std::map<avro::Name, avro::ValidSchema> namedReferences;
97+
namedReferences[avro::Name("NestedRecord")] = nestedRecord;
98+
99+
// Parse the root schema with named references
100+
std::istringstream rootSchemaStream(rootSchema);
101+
avro::ValidSchema rootRecord = avro::compileJsonSchemaWithNamedReferences(rootSchemaStream, namedReferences);
102+
103+
// Verify the schema was compiled correctly
104+
BOOST_CHECK_EQUAL("RootRecord", rootRecord.root()->name().simpleName());
105+
106+
// Get the nested field and verify its type
107+
const avro::NodePtr &rootNode = rootRecord.root();
108+
BOOST_CHECK_EQUAL(avro::AVRO_RECORD, rootNode->type());
109+
BOOST_CHECK_EQUAL(1, rootNode->leaves());
110+
111+
const avro::NodePtr &nestedFieldNode = rootNode->leafAt(0);
112+
BOOST_CHECK_EQUAL("NestedRecord", nestedFieldNode->name().simpleName());
113+
}
114+
115+
// Verify recursive schemas don't create shared_ptr cycles by ensuring the
116+
// root node expires once the ValidSchema goes out of scope. Example: binary
117+
// tree node with left/right as union of null and the node type itself.
118+
void testRecursiveBinaryTreeWeakPtrExpires() {
119+
std::weak_ptr<avro::Node> weakRoot;
120+
121+
{
122+
const std::string schema = R"({
123+
"type": "record",
124+
"name": "Node",
125+
"fields": [
126+
{"name": "value", "type": "int"},
127+
{"name": "left", "type": ["null", "Node"], "default": null},
128+
{"name": "right", "type": ["null", "Node"], "default": null}
129+
]
130+
})";
131+
132+
avro::ValidSchema s = avro::compileJsonSchemaFromString(schema);
133+
// Capture a weak reference to the root node while the schema is alive.
134+
weakRoot = s.root();
135+
136+
// Optionally exercise the schema to ensure validation completed.
137+
BOOST_CHECK_EQUAL(avro::AVRO_RECORD, s.root()->type());
138+
BOOST_CHECK_EQUAL("Node", s.root()->name().simpleName());
139+
}
140+
141+
// After the ValidSchema (and any strong references) go out of scope,
142+
// the weak pointer must not be lockable if there are no cycles.
143+
BOOST_CHECK(weakRoot.expired());
144+
}
145+
85146
boost::unit_test::test_suite *
86147
init_unit_test_suite(int /*argc*/, char * /*argv*/[]) {
87148
using namespace boost::unit_test;
88149

89150
auto *ts = BOOST_TEST_SUITE("Avro C++ unit tests for Compiler.cc");
90151
ts->add(BOOST_TEST_CASE(&testEmptyBytesDefault));
91152
ts->add(BOOST_TEST_CASE(&test2dArray));
153+
ts->add(BOOST_TEST_CASE(&testRecordWithNamedReference));
154+
ts->add(BOOST_TEST_CASE(&testRecursiveBinaryTreeWeakPtrExpires));
92155
return ts;
93156
}

0 commit comments

Comments
 (0)