Skip to content

Commit 5e27319

Browse files
authored
Data declaration extensions (#216)
* Extend column * CSVColumn -> GNFColumn * IcebergRelation -> IcebergData * Simplify grammar and rename csv_column -> gnf_column * RelEDB -> EDB * Update SDKS * Generalize Snapshot action * Remove stray binary
1 parent 2f78895 commit 5e27319

38 files changed

Lines changed: 11439 additions & 10776 deletions

meta/src/meta/grammar.y

Lines changed: 45 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,9 @@
7171
%nonterm construct logic.Construct
7272
%nonterm context transactions.Context
7373
%nonterm csv_asof String
74-
%nonterm csv_column logic.CSVColumn
75-
%nonterm csv_columns Sequence[logic.CSVColumn]
74+
%nonterm gnf_column logic.GNFColumn
75+
%nonterm gnf_column_path Sequence[String]
76+
%nonterm gnf_columns Sequence[logic.GNFColumn]
7677
%nonterm csv_config logic.CSVConfig
7778
%nonterm csv_data logic.CSVData
7879
%nonterm csv_locator_inline_data String
@@ -137,16 +138,17 @@
137138
%nonterm read transactions.Read
138139
%nonterm reduce logic.Reduce
139140
%nonterm rel_atom logic.RelAtom
140-
%nonterm rel_edb logic.RelEDB
141-
%nonterm rel_edb_path Sequence[String]
142-
%nonterm rel_edb_types Sequence[logic.Type]
141+
%nonterm edb logic.EDB
142+
%nonterm edb_path Sequence[String]
143+
%nonterm edb_types Sequence[logic.Type]
143144
%nonterm rel_term logic.RelTerm
144145
%nonterm relation_id logic.RelationId
145146
%nonterm script logic.Script
146147
%nonterm specialized_value logic.Value
147148
%nonterm string_type logic.StringType
148149
%nonterm sum_monoid logic.SumMonoid
149150
%nonterm snapshot transactions.Snapshot
151+
%nonterm snapshot_mapping transactions.SnapshotMapping
150152
%nonterm sync transactions.Sync
151153
%nonterm term logic.Term
152154
%nonterm terms Sequence[logic.Term]
@@ -899,10 +901,10 @@ functional_dependency_values
899901
: "(" "values" var* ")"
900902

901903
data
902-
: rel_edb
903-
construct: $$ = logic.Data(rel_edb=$1)
904-
deconstruct if builtin.has_proto_field($$, 'rel_edb'):
905-
$1: logic.RelEDB = $$.rel_edb
904+
: edb
905+
construct: $$ = logic.Data(edb=$1)
906+
deconstruct if builtin.has_proto_field($$, 'edb'):
907+
$1: logic.EDB = $$.edb
906908
| betree_relation
907909
construct: $$ = logic.Data(betree_relation=$1)
908910
deconstruct if builtin.has_proto_field($$, 'betree_relation'):
@@ -912,15 +914,15 @@ data
912914
deconstruct if builtin.has_proto_field($$, 'csv_data'):
913915
$1: logic.CSVData = $$.csv_data
914916

915-
rel_edb_path
917+
edb_path
916918
: "[" STRING* "]"
917919

918-
rel_edb_types
920+
edb_types
919921
: "[" type* "]"
920922

921-
rel_edb
922-
: "(" "rel_edb" relation_id rel_edb_path rel_edb_types ")"
923-
construct: $$ = logic.RelEDB(target_id=$3, path=$4, types=$5)
923+
edb
924+
: "(" "edb" relation_id edb_path edb_types ")"
925+
construct: $$ = logic.EDB(target_id=$3, path=$4, types=$5)
924926
deconstruct:
925927
$3: logic.RelationId = $$.target_id
926928
$4: Sequence[String] = $$.path
@@ -947,19 +949,19 @@ betree_info_key_types
947949
betree_info_value_types
948950
: "(" "value_types" type* ")"
949951

950-
csv_columns
951-
: "(" "columns" csv_column* ")"
952+
gnf_columns
953+
: "(" "columns" gnf_column* ")"
952954

953955
csv_asof
954956
: "(" "asof" STRING ")"
955957

956958
csv_data
957-
: "(" "csv_data" csvlocator csv_config csv_columns csv_asof ")"
959+
: "(" "csv_data" csvlocator csv_config gnf_columns csv_asof ")"
958960
construct: $$ = logic.CSVData(locator=$3, config=$4, columns=$5, asof=$6)
959961
deconstruct:
960962
$3: logic.CSVLocator = $$.locator
961963
$4: logic.CSVConfig = $$.config
962-
$5: Sequence[logic.CSVColumn] = $$.columns
964+
$5: Sequence[logic.GNFColumn] = $$.columns
963965
$6: String = $$.asof
964966

965967
csv_locator_paths
@@ -980,12 +982,22 @@ csv_config
980982
construct: $$ = construct_csv_config($3)
981983
deconstruct: $3: Sequence[Tuple[String, logic.Value]] = deconstruct_csv_config($$)
982984

983-
csv_column
984-
: "(" "column" STRING relation_id "[" type* "]" ")"
985-
construct: $$ = logic.CSVColumn(column_name=$3, target_id=$4, types=$6)
985+
gnf_column_path
986+
: STRING
987+
construct: $$ = [$1]
988+
deconstruct if builtin.length($$) == 1:
989+
$1: String = $$[0]
990+
| "[" STRING* "]"
991+
construct: $$ = $2
992+
deconstruct if builtin.length($$) != 1:
993+
$2: Sequence[String] = $$
994+
995+
gnf_column
996+
: "(" "column" gnf_column_path relation_id? "[" type* "]" ")"
997+
construct: $$ = logic.GNFColumn(column_path=$3, target_id=$4, types=$6)
986998
deconstruct:
987-
$3: String = $$.column_name
988-
$4: logic.RelationId = $$.target_id
999+
$3: Sequence[String] = $$.column_path
1000+
$4: Optional[logic.RelationId] = $$.target_id if builtin.has_proto_field($$, "target_id") else None
9891001
$6: Sequence[logic.Type] = $$.types
9901002

9911003
undefine
@@ -998,12 +1010,17 @@ context
9981010
construct: $$ = transactions.Context(relations=$3)
9991011
deconstruct: $3: Sequence[logic.RelationId] = $$.relations
10001012

1001-
snapshot
1002-
: "(" "snapshot" rel_edb_path relation_id ")"
1003-
construct: $$ = transactions.Snapshot(destination_path=$3, source_relation=$4)
1013+
snapshot_mapping
1014+
: edb_path relation_id
1015+
construct: $$ = transactions.SnapshotMapping(destination_path=$1, source_relation=$2)
10041016
deconstruct:
1005-
$3: Sequence[String] = $$.destination_path
1006-
$4: logic.RelationId = $$.source_relation
1017+
$1: Sequence[String] = $$.destination_path
1018+
$2: logic.RelationId = $$.source_relation
1019+
1020+
snapshot
1021+
: "(" "snapshot" snapshot_mapping* ")"
1022+
construct: $$ = transactions.Snapshot(mappings=$3)
1023+
deconstruct: $3: Sequence[transactions.SnapshotMapping] = $$.mappings
10071024

10081025
epoch_reads
10091026
: "(" "reads" read* ")"

meta/src/meta/yacc_action_parser.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -169,15 +169,15 @@ def _infer_type(
169169
elif isinstance(expr, ListExpr):
170170
return ListType(expr.element_type)
171171
elif isinstance(expr, GetElement):
172-
# Infer tuple element type
173-
tuple_type = _infer_type(expr.tuple_expr, line, ctx)
174-
if isinstance(tuple_type, TupleType) and 0 <= expr.index < len(
175-
tuple_type.elements
172+
# Infer element type from tuple or sequence/list indexing
173+
container_type = _infer_type(expr.tuple_expr, line, ctx)
174+
if isinstance(container_type, TupleType) and 0 <= expr.index < len(
175+
container_type.elements
176176
):
177-
return tuple_type.elements[expr.index]
178-
raise YaccGrammarError(
179-
f"Cannot infer type of tuple element access: {expr}", line
180-
)
177+
return container_type.elements[expr.index]
178+
if isinstance(container_type, (SequenceType, ListType)):
179+
return container_type.element_type
180+
raise YaccGrammarError(f"Cannot infer type of element access: {expr}", line)
181181
elif isinstance(expr, GetField):
182182
# GetField has field_type from proto schema lookup (or Unknown if not found)
183183
return expr.field_type

proto/relationalai/lqp/v1/logic.proto

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -232,14 +232,14 @@ message Attribute {
232232
//
233233
message Data {
234234
oneof data_type {
235-
RelEDB rel_edb = 1;
235+
EDB edb = 1;
236236
BeTreeRelation betree_relation = 2;
237237
CSVData csv_data = 3;
238-
// IcebergRelation iceberg_relation = 4;
238+
// IcebergData iceberg_data = 4;
239239
}
240240
}
241241

242-
message RelEDB {
242+
message EDB {
243243
RelationId target_id = 1;
244244
repeated string path = 2;
245245
repeated Type types = 3;
@@ -277,7 +277,7 @@ message BeTreeLocator {
277277
message CSVData {
278278
CSVLocator locator = 1;
279279
CSVConfig config = 2;
280-
repeated CSVColumn columns = 3;
280+
repeated GNFColumn columns = 3;
281281
string asof = 4; // Blob storage timestamp for freshness requirements
282282
}
283283

@@ -311,9 +311,9 @@ message CSVConfig {
311311
string compression = 11; // "none", "gzip", "zstd", "auto" (default: "auto")
312312
}
313313

314-
message CSVColumn {
315-
string column_name = 1; // Name in CSV file
316-
RelationId target_id = 2; // Target relation
314+
message GNFColumn {
315+
repeated string column_path = 1; // Column identifier path (was: string column_name)
316+
optional RelationId target_id = 2; // Target relation (now explicit optional)
317317
repeated Type types = 3; // Relation signature (key types + value types)
318318
}
319319

proto/relationalai/lqp/v1/transactions.proto

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,18 @@ message Context {
6464
repeated RelationId relations = 1;
6565
}
6666

67-
// Demand the source IDB, take an immutable snapshot, and turn it into an EDB under the
68-
// given path (specified as a sequence of strings, see RelEDB).
69-
message Snapshot {
67+
// A single (destination, source) pair within a Snapshot action.
68+
message SnapshotMapping {
7069
repeated string destination_path = 1;
7170
RelationId source_relation = 2;
7271
}
7372

73+
// Demand the source IDBs, take immutable snapshots, and turn them into EDBs under the
74+
// given paths (specified as sequences of strings, see EDB).
75+
message Snapshot {
76+
repeated SnapshotMapping mappings = 1;
77+
}
78+
7479
//
7580
// Export config
7681
//

sdks/go/src/lqp/v1/fragments.pb.go

Lines changed: 6 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)