Skip to content

Commit 9243f4c

Browse files
authored
feat(cubesql): add pg_catalog.pg_collation table (#9968)
1 parent 020e455 commit 9243f4c

5 files changed

Lines changed: 310 additions & 3 deletions

File tree

rust/cubesql/cubesql/src/compile/engine/context_postgresql.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ use super::information_schema::postgres::{
1919
InfoSchemaTestingBlockingProvider, InfoSchemaTestingDatasetProvider, PgCatalogAmProvider,
2020
PgCatalogAttrdefProvider, PgCatalogAttributeProvider, PgCatalogAuthMembersProvider,
2121
PgCatalogAvailableExtensionVersionsProvider, PgCatalogCastProvider, PgCatalogClassProvider,
22-
PgCatalogConstraintProvider, PgCatalogDatabaseProvider, PgCatalogDependProvider,
23-
PgCatalogDescriptionProvider, PgCatalogEnumProvider, PgCatalogEventTriggerProvider,
24-
PgCatalogExtensionProvider, PgCatalogForeignDataWrapperProvider,
22+
PgCatalogCollationProvider, PgCatalogConstraintProvider, PgCatalogDatabaseProvider,
23+
PgCatalogDependProvider, PgCatalogDescriptionProvider, PgCatalogEnumProvider,
24+
PgCatalogEventTriggerProvider, PgCatalogExtensionProvider, PgCatalogForeignDataWrapperProvider,
2525
PgCatalogForeignServerProvider, PgCatalogForeignTableProvider, PgCatalogIndexProvider,
2626
PgCatalogInheritsProvider, PgCatalogLanguageProvider, PgCatalogLocksProvider,
2727
PgCatalogMatviewsProvider, PgCatalogNamespaceProvider, PgCatalogOperatorProvider,
@@ -99,6 +99,8 @@ impl DatabaseProtocol {
9999
"pg_catalog.pg_index".to_string()
100100
} else if let Some(_) = any.downcast_ref::<PgCatalogClassProvider>() {
101101
"pg_catalog.pg_class".to_string()
102+
} else if let Some(_) = any.downcast_ref::<PgCatalogCollationProvider>() {
103+
"pg_catalog.pg_collation".to_string()
102104
} else if let Some(_) = any.downcast_ref::<PgCatalogProcProvider>() {
103105
"pg_catalog.pg_proc".to_string()
104106
} else if let Some(_) = any.downcast_ref::<PgCatalogSettingsProvider>() {
@@ -377,6 +379,7 @@ impl DatabaseProtocol {
377379
"pg_class" => {
378380
return Some(Arc::new(PgCatalogClassProvider::new(&context.meta.tables)))
379381
}
382+
"pg_collation" => return Some(Arc::new(PgCatalogCollationProvider::new())),
380383
"pg_proc" => return Some(Arc::new(PgCatalogProcProvider::new())),
381384
"pg_settings" => {
382385
return Some(Arc::new(PgCatalogSettingsProvider::new(

rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ mod pg_auth_members;
2020
mod pg_available_extension_versions;
2121
mod pg_cast;
2222
mod pg_class;
23+
mod pg_collation;
2324
mod pg_constraint;
2425
mod pg_database;
2526
mod pg_depend;
@@ -71,6 +72,7 @@ pub use pg_auth_members::*;
7172
pub use pg_available_extension_versions::*;
7273
pub use pg_cast::*;
7374
pub use pg_class::*;
75+
pub use pg_collation::*;
7476
pub use pg_constraint::*;
7577
pub use pg_database::*;
7678
pub use pg_depend::*;
Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
use std::sync::Arc;
2+
3+
use async_trait::async_trait;
4+
5+
use datafusion::{
6+
arrow::{
7+
array::{Array, ArrayRef, BooleanBuilder, Int32Builder, StringBuilder, UInt32Builder},
8+
datatypes::{DataType, Field, Schema},
9+
record_batch::RecordBatch,
10+
},
11+
datasource::{datasource::TableProviderFilterPushDown, TableProvider},
12+
error::Result,
13+
logical_plan::Expr,
14+
physical_plan::{memory::MemoryExec, ExecutionPlan},
15+
};
16+
17+
use crate::compile::engine::information_schema::postgres::PG_NAMESPACE_CATALOG_OID;
18+
19+
struct PgCollation {
20+
oid: u32,
21+
collname: &'static str,
22+
collnamespace: u32,
23+
collowner: u32,
24+
collprovider: String,
25+
collisdeterministic: bool,
26+
collencoding: i32,
27+
collcollate: Option<String>,
28+
collctype: Option<String>,
29+
// Column `colliculocale` is renamed to `colllocale` since PostgreSQL 17.
30+
colllocale: Option<String>,
31+
collicurules: Option<String>,
32+
collversion: Option<String>,
33+
}
34+
35+
struct PgCatalogCollationBuilder {
36+
oid: UInt32Builder,
37+
collname: StringBuilder,
38+
collnamespace: UInt32Builder,
39+
collowner: UInt32Builder,
40+
collprovider: StringBuilder,
41+
collisdeterministic: BooleanBuilder,
42+
collencoding: Int32Builder,
43+
collcollate: StringBuilder,
44+
collctype: StringBuilder,
45+
// Column `colliculocale` is renamed to `colllocale` since PostgreSQL 17.
46+
// Support both columns for backward-compatibility.
47+
// Reference: https://pgpedia.info/p/pg_collation.html
48+
colllocale: StringBuilder,
49+
colliculocale: StringBuilder,
50+
collicurules: StringBuilder,
51+
collversion: StringBuilder,
52+
}
53+
54+
impl PgCatalogCollationBuilder {
55+
fn new(capacity: usize) -> Self {
56+
Self {
57+
oid: UInt32Builder::new(capacity),
58+
collname: StringBuilder::new(capacity),
59+
collnamespace: UInt32Builder::new(capacity),
60+
collowner: UInt32Builder::new(capacity),
61+
collprovider: StringBuilder::new(capacity),
62+
collisdeterministic: BooleanBuilder::new(capacity),
63+
collencoding: Int32Builder::new(capacity),
64+
collcollate: StringBuilder::new(capacity),
65+
collctype: StringBuilder::new(capacity),
66+
colllocale: StringBuilder::new(capacity),
67+
colliculocale: StringBuilder::new(capacity),
68+
collicurules: StringBuilder::new(capacity),
69+
collversion: StringBuilder::new(capacity),
70+
}
71+
}
72+
fn add_collation(&mut self, coll: &PgCollation) {
73+
self.oid.append_value(coll.oid).unwrap();
74+
self.collname.append_value(coll.collname).unwrap();
75+
self.collnamespace.append_value(coll.collnamespace).unwrap();
76+
self.collowner.append_value(coll.collowner).unwrap();
77+
self.collprovider
78+
.append_value(coll.collprovider.clone())
79+
.unwrap();
80+
self.collisdeterministic
81+
.append_value(coll.collisdeterministic)
82+
.unwrap();
83+
self.collencoding.append_value(coll.collencoding).unwrap();
84+
self.collcollate
85+
.append_option(coll.collcollate.clone())
86+
.unwrap();
87+
self.collctype
88+
.append_option(coll.collctype.clone())
89+
.unwrap();
90+
self.colllocale
91+
.append_option(coll.colllocale.clone())
92+
.unwrap();
93+
// Column `colliculocale` is renamed to `colllocale` since PostgreSQL 17.
94+
self.colliculocale
95+
.append_option(coll.colllocale.clone())
96+
.unwrap();
97+
self.collicurules
98+
.append_option(coll.collicurules.clone())
99+
.unwrap();
100+
self.collversion
101+
.append_option(coll.collversion.clone())
102+
.unwrap();
103+
}
104+
105+
fn finish(mut self) -> Vec<Arc<dyn Array>> {
106+
let columns: Vec<Arc<dyn Array>> = vec![
107+
Arc::new(self.oid.finish()),
108+
Arc::new(self.collname.finish()),
109+
Arc::new(self.collnamespace.finish()),
110+
Arc::new(self.collowner.finish()),
111+
Arc::new(self.collprovider.finish()),
112+
Arc::new(self.collisdeterministic.finish()),
113+
Arc::new(self.collencoding.finish()),
114+
Arc::new(self.collcollate.finish()),
115+
Arc::new(self.collctype.finish()),
116+
Arc::new(self.colllocale.finish()),
117+
Arc::new(self.colliculocale.finish()),
118+
Arc::new(self.collicurules.finish()),
119+
Arc::new(self.collversion.finish()),
120+
];
121+
columns
122+
}
123+
}
124+
125+
pub struct PgCatalogCollationProvider {
126+
data: Arc<Vec<ArrayRef>>,
127+
}
128+
129+
impl PgCatalogCollationProvider {
130+
pub fn new() -> Self {
131+
// See https://github.com/postgres/postgres/blob/REL_17_6/src/include/catalog/pg_collation.h
132+
let mut builder = PgCatalogCollationBuilder::new(6);
133+
134+
// Initial contents of the pg_collation system catalog.
135+
// See https://github.com/postgres/postgres/blob/REL_17_6/src/include/catalog/pg_collation.dat
136+
137+
// database's default collation
138+
builder.add_collation(&PgCollation {
139+
oid: 100,
140+
collname: "default",
141+
collnamespace: PG_NAMESPACE_CATALOG_OID,
142+
collowner: 10,
143+
collprovider: "d".to_string(),
144+
collisdeterministic: true,
145+
collencoding: -1,
146+
collcollate: None,
147+
collctype: None,
148+
colllocale: None,
149+
collicurules: None,
150+
collversion: None,
151+
});
152+
// standard C collation
153+
builder.add_collation(&PgCollation {
154+
oid: 950,
155+
collname: "C",
156+
collnamespace: PG_NAMESPACE_CATALOG_OID,
157+
collowner: 10,
158+
collprovider: "c".to_string(),
159+
collisdeterministic: true,
160+
collencoding: -1,
161+
collcollate: Some("C".to_string()),
162+
collctype: Some("C".to_string()),
163+
colllocale: None,
164+
collicurules: None,
165+
collversion: None,
166+
});
167+
// standard POSIX collation
168+
builder.add_collation(&PgCollation {
169+
oid: 951,
170+
collname: "POSIX",
171+
collnamespace: PG_NAMESPACE_CATALOG_OID,
172+
collowner: 10,
173+
collprovider: "c".to_string(),
174+
collisdeterministic: true,
175+
collencoding: -1,
176+
collcollate: Some("POSIX".to_string()),
177+
collctype: Some("POSIX".to_string()),
178+
colllocale: None,
179+
collicurules: None,
180+
collversion: None,
181+
});
182+
// sorts by Unicode code point, C character semantics
183+
builder.add_collation(&PgCollation {
184+
oid: 962,
185+
collname: "ucs_basic",
186+
collnamespace: PG_NAMESPACE_CATALOG_OID,
187+
collowner: 10,
188+
collprovider: "b".to_string(),
189+
collisdeterministic: true,
190+
collencoding: 6,
191+
collcollate: None,
192+
collctype: None,
193+
colllocale: Some("C".to_string()),
194+
collicurules: None,
195+
collversion: Some("1".to_string()),
196+
});
197+
// sorts using the Unicode Collation Algorithm with default settings
198+
builder.add_collation(&PgCollation {
199+
oid: 963,
200+
collname: "unicode",
201+
collnamespace: PG_NAMESPACE_CATALOG_OID,
202+
collowner: 10,
203+
collprovider: "i".to_string(),
204+
collisdeterministic: true,
205+
collencoding: -1,
206+
collcollate: None,
207+
collctype: None,
208+
colllocale: Some("und".to_string()),
209+
collicurules: None,
210+
collversion: Some("153.128".to_string()),
211+
});
212+
// sorts by Unicode code point; Unicode and POSIX character semantics
213+
builder.add_collation(&PgCollation {
214+
oid: 811,
215+
collname: "pg_c_utf8",
216+
collnamespace: PG_NAMESPACE_CATALOG_OID,
217+
collowner: 10,
218+
collprovider: "b".to_string(),
219+
collisdeterministic: true,
220+
collencoding: 6,
221+
collcollate: None,
222+
collctype: None,
223+
colllocale: Some("C.UTF-8".to_string()),
224+
collicurules: None,
225+
collversion: Some("1".to_string()),
226+
});
227+
Self {
228+
data: Arc::new(builder.finish()),
229+
}
230+
}
231+
}
232+
233+
#[async_trait]
234+
impl TableProvider for PgCatalogCollationProvider {
235+
fn as_any(&self) -> &dyn std::any::Any {
236+
self
237+
}
238+
fn schema(&self) -> datafusion::arrow::datatypes::SchemaRef {
239+
Arc::new(Schema::new(vec![
240+
Field::new("oid", DataType::UInt32, false),
241+
Field::new("collname", DataType::Utf8, false),
242+
Field::new("collnamespace", DataType::UInt32, false),
243+
Field::new("collowner", DataType::UInt32, false),
244+
Field::new("collprovider", DataType::Utf8, false),
245+
Field::new("collisdeterministic", DataType::Boolean, false),
246+
Field::new("collencoding", DataType::Int32, false),
247+
Field::new("collcollate", DataType::Utf8, true),
248+
Field::new("collctype", DataType::Utf8, true),
249+
Field::new("colllocale", DataType::Utf8, true),
250+
Field::new("colliculocale", DataType::Utf8, true),
251+
Field::new("collicurules", DataType::Utf8, true),
252+
Field::new("collversion", DataType::Utf8, true),
253+
]))
254+
}
255+
async fn scan(
256+
&self,
257+
projection: &Option<Vec<usize>>,
258+
_filters: &[Expr],
259+
// limit can be used to reduce the amount scanned
260+
// from the datasource as a performance optimization.
261+
// If set, it contains the amount of rows needed by the `LogicalPlan`,
262+
// The datasource should return *at least* this number of rows if available.
263+
_limit: Option<usize>,
264+
) -> Result<Arc<dyn ExecutionPlan>> {
265+
let batch = RecordBatch::try_new(self.schema(), self.data.to_vec())?;
266+
Ok(Arc::new(MemoryExec::try_new(
267+
&[vec![batch]],
268+
self.schema(),
269+
projection.clone(),
270+
)?))
271+
}
272+
fn supports_filter_pushdown(&self, _filter: &Expr) -> Result<TableProviderFilterPushDown> {
273+
Ok(TableProviderFilterPushDown::Unsupported)
274+
}
275+
}

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17698,4 +17698,17 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
1769817698
}
1769917699
)
1770017700
}
17701+
17702+
#[tokio::test]
17703+
async fn test_pg_collation() -> Result<(), CubeError> {
17704+
insta::assert_snapshot!(
17705+
"pg_collation_PG17",
17706+
execute_query(
17707+
"SELECT * FROM pg_catalog.pg_collation ORDER BY oid".to_string(),
17708+
DatabaseProtocol::PostgreSQL
17709+
)
17710+
.await?
17711+
);
17712+
Ok(())
17713+
}
1770117714
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
---
2+
source: cubesql/src/compile/mod.rs
3+
expression: "execute_query(\"SELECT * FROM pg_catalog.pg_collation ORDER BY oid\".to_string(),\nDatabaseProtocol::PostgreSQL).await?"
4+
---
5+
+-----+-----------+---------------+-----------+--------------+---------------------+--------------+-------------+-----------+------------+---------------+--------------+-------------+
6+
| oid | collname | collnamespace | collowner | collprovider | collisdeterministic | collencoding | collcollate | collctype | colllocale | colliculocale | collicurules | collversion |
7+
+-----+-----------+---------------+-----------+--------------+---------------------+--------------+-------------+-----------+------------+---------------+--------------+-------------+
8+
| 100 | default | 11 | 10 | d | true | -1 | NULL | NULL | NULL | NULL | NULL | NULL |
9+
| 811 | pg_c_utf8 | 11 | 10 | b | true | 6 | NULL | NULL | C.UTF-8 | C.UTF-8 | NULL | 1 |
10+
| 950 | C | 11 | 10 | c | true | -1 | C | C | NULL | NULL | NULL | NULL |
11+
| 951 | POSIX | 11 | 10 | c | true | -1 | POSIX | POSIX | NULL | NULL | NULL | NULL |
12+
| 962 | ucs_basic | 11 | 10 | b | true | 6 | NULL | NULL | C | C | NULL | 1 |
13+
| 963 | unicode | 11 | 10 | i | true | -1 | NULL | NULL | und | und | NULL | 153.128 |
14+
+-----+-----------+---------------+-----------+--------------+---------------------+--------------+-------------+-----------+------------+---------------+--------------+-------------+

0 commit comments

Comments
 (0)