Skip to content

Commit d937ce8

Browse files
committed
(improvement) metadata: single-pass column classification in _build_table_columns
Rewrite _build_table_columns to classify columns by kind in a single pass instead of iterating col_rows three times with list comprehensions. This also fixes a bug where the third pass filtered on 'clustering_key' instead of 'clustering', causing clustering columns to leak through and get re-processed as regular columns. Additionally, use in-place sort() instead of sorted() to avoid creating intermediate list copies, and append the already-built column_meta object to partition_key/clustering_key instead of re-looking it up from meta.columns by name. Combined benchmark results for the full optimization series (A-F): Row creation + access: 323 ns/row vs 485 ns/row (1.50x faster) _build_table_columns: 9.0 us/table vs 9.9 us/table (1.10x faster) Full pipeline (100 tables x 20 cols): 0.79 ms vs 1.57 ms (1.98x faster) Memory per row: 48 bytes vs 272 bytes (5.7x reduction) __slots__ per instance: 80 bytes (saves ~104 bytes __dict__ overhead)
1 parent 40ccd3d commit d937ce8

1 file changed

Lines changed: 23 additions & 14 deletions

File tree

cassandra/metadata.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2673,31 +2673,40 @@ def _build_table_options(self, row):
26732673
return dict((o, row.get(o)) for o in self.recognized_table_options if o in row)
26742674

26752675
def _build_table_columns(self, meta, col_rows, compact_static=False, is_dense=False, virtual=False):
2676-
# partition key
2677-
partition_rows = [r for r in col_rows
2678-
if r.get('kind', None) == "partition_key"]
2676+
# Single-pass classification of column rows by kind
2677+
partition_rows = []
2678+
clustering_rows = []
2679+
other_rows = []
2680+
for r in col_rows:
2681+
kind = r.get('kind', None)
2682+
if kind == "partition_key":
2683+
partition_rows.append(r)
2684+
elif kind == "clustering":
2685+
if not compact_static:
2686+
clustering_rows.append(r)
2687+
# else: skip clustering rows entirely for compact_static tables
2688+
else:
2689+
other_rows.append(r)
2690+
2691+
# partition key — must be inserted first into meta.columns for CQL export ordering
26792692
if len(partition_rows) > 1:
2680-
partition_rows = sorted(partition_rows, key=lambda row: row.get('position'))
2693+
partition_rows.sort(key=lambda row: row.get('position'))
26812694
for r in partition_rows:
2682-
# we have to add meta here (and not in the later loop) because TableMetadata.columns
2683-
# assumes keys are inserted first, in order, when exporting CQL
26842695
column_meta = self._build_column_metadata(meta, r)
26852696
meta.columns[column_meta.name] = column_meta
2686-
meta.partition_key.append(meta.columns[r.get('column_name')])
2697+
meta.partition_key.append(column_meta)
26872698

26882699
# clustering key
2689-
if not compact_static:
2690-
clustering_rows = [r for r in col_rows
2691-
if r.get('kind', None) == "clustering"]
2700+
if clustering_rows:
26922701
if len(clustering_rows) > 1:
2693-
clustering_rows = sorted(clustering_rows, key=lambda row: row.get('position'))
2702+
clustering_rows.sort(key=lambda row: row.get('position'))
26942703
for r in clustering_rows:
26952704
column_meta = self._build_column_metadata(meta, r)
26962705
meta.columns[column_meta.name] = column_meta
2697-
meta.clustering_key.append(meta.columns[r.get('column_name')])
2706+
meta.clustering_key.append(column_meta)
26982707

2699-
for col_row in (r for r in col_rows
2700-
if r.get('kind', None) not in ('partition_key', 'clustering_key')):
2708+
# remaining columns (static, regular, etc.)
2709+
for col_row in other_rows:
27012710
column_meta = self._build_column_metadata(meta, col_row)
27022711
if is_dense and column_meta.cql_type == types.cql_empty_type:
27032712
continue

0 commit comments

Comments
 (0)