diff --git a/pom.xml b/pom.xml index 7003583..e8bda24 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ database-audits-core - 2.0.0-SNAPSHOT + 2.0.1-SNAPSHOT jar Database Audits Core @@ -39,7 +39,7 @@ scm:git:https://github.com/database-audits/core.git scm:git:https://github.com/database-audits/core.git https://github.com/database-audits/core - HEAD + v2.0.0 diff --git a/src/main/java/io/github/databaseaudits/audit/catalog/ForeignKeyIndexAudit.java b/src/main/java/io/github/databaseaudits/audit/catalog/ForeignKeyIndexAudit.java index 84c4779..a39f97a 100644 --- a/src/main/java/io/github/databaseaudits/audit/catalog/ForeignKeyIndexAudit.java +++ b/src/main/java/io/github/databaseaudits/audit/catalog/ForeignKeyIndexAudit.java @@ -45,67 +45,8 @@ record ForeignKey(String tableName, String constraintName, String referencedTable, List columns) { } - private static final String POSTGRESQL_FK_SQL = - """ - SELECT cl.relname AS table_name, - c.conname AS constraint_name, - ref.relname AS referenced_table, - a.attname AS column_name - FROM pg_constraint c - JOIN pg_class cl ON cl.oid = c.conrelid - JOIN pg_class ref ON ref.oid = c.confrelid - CROSS JOIN LATERAL unnest(c.conkey) WITH ORDINALITY AS k(attnum, ordinal) - JOIN pg_attribute a ON a.attrelid = c.conrelid AND a.attnum = k.attnum - WHERE c.contype = 'f' - AND c.connamespace = ?::regnamespace - ORDER BY 1, 2, k.ordinal - """; - - /** - * key_column_usage carries the referenced table directly on MySQL/MariaDB. - */ - private static final String MYSQL_FK_SQL = """ - SELECT k.table_name AS table_name, - k.constraint_name AS constraint_name, - k.referenced_table_name AS referenced_table, - k.column_name AS column_name - FROM information_schema.key_column_usage k - WHERE k.table_schema = ? - AND k.referenced_table_name IS NOT NULL - ORDER BY 1, 2, k.ordinal_position - """; - - /** - * Standard information_schema; constraint names are unique per schema on - * H2, so the joins are exact. - */ - private static final String H2_FK_SQL = """ - SELECT tc.table_name AS table_name, - tc.constraint_name AS constraint_name, - ref_tc.table_name AS referenced_table, - kcu.column_name AS column_name - FROM information_schema.table_constraints tc - JOIN information_schema.key_column_usage kcu - ON kcu.constraint_schema = tc.constraint_schema - AND kcu.constraint_name = tc.constraint_name - AND kcu.table_name = tc.table_name - JOIN information_schema.referential_constraints rc - ON rc.constraint_schema = tc.constraint_schema - AND rc.constraint_name = tc.constraint_name - LEFT JOIN information_schema.table_constraints ref_tc - ON ref_tc.constraint_schema = rc.unique_constraint_schema - AND ref_tc.constraint_name = rc.unique_constraint_name - WHERE tc.constraint_type = 'FOREIGN KEY' - AND tc.table_schema = ? - ORDER BY 1, 2, kcu.ordinal_position - """; - String sql() { - return switch (platform) { - case POSTGRESQL -> POSTGRESQL_FK_SQL; - case MYSQL, MARIADB -> MYSQL_FK_SQL; - case H2 -> H2_FK_SQL; - }; + return platform.catalogDialect().foreignKeysSql(); } /** diff --git a/src/main/java/io/github/databaseaudits/audit/catalog/ForeignKeyNotNullAudit.java b/src/main/java/io/github/databaseaudits/audit/catalog/ForeignKeyNotNullAudit.java index 523d878..dad2b3a 100644 --- a/src/main/java/io/github/databaseaudits/audit/catalog/ForeignKeyNotNullAudit.java +++ b/src/main/java/io/github/databaseaudits/audit/catalog/ForeignKeyNotNullAudit.java @@ -28,35 +28,8 @@ public class ForeignKeyNotNullAudit { private final CatalogQueries catalogQueries; private final DatabasePlatform platform; - /** - * Standard information_schema, valid as-is on PostgreSQL, MySQL, MariaDB, - * and H2. The join includes {@code table_name} because constraint names are - * only unique per table on PostgreSQL and MySQL. - */ - private static final String INFORMATION_SCHEMA_NULLABLE_FK_COLUMN_SQL = """ - SELECT kcu.table_name AS table_name, - kcu.constraint_name AS constraint_name, - kcu.column_name AS column_name - FROM information_schema.table_constraints tc - JOIN information_schema.key_column_usage kcu - ON kcu.constraint_schema = tc.constraint_schema - AND kcu.constraint_name = tc.constraint_name - AND kcu.table_name = tc.table_name - JOIN information_schema.columns col - ON col.table_schema = kcu.table_schema - AND col.table_name = kcu.table_name - AND col.column_name = kcu.column_name - WHERE tc.constraint_type = 'FOREIGN KEY' - AND tc.table_schema = ? - AND col.is_nullable = 'YES' - ORDER BY 1, 2, 3 - """; - String sql() { - return switch (platform) { - case POSTGRESQL, MYSQL, MARIADB, H2 -> - INFORMATION_SCHEMA_NULLABLE_FK_COLUMN_SQL; - }; + return platform.catalogDialect().nullableForeignKeyColumnSql(); } /** diff --git a/src/main/java/io/github/databaseaudits/audit/catalog/ForeignKeyTypeMatchAudit.java b/src/main/java/io/github/databaseaudits/audit/catalog/ForeignKeyTypeMatchAudit.java index d2b9e3a..4b647d9 100644 --- a/src/main/java/io/github/databaseaudits/audit/catalog/ForeignKeyTypeMatchAudit.java +++ b/src/main/java/io/github/databaseaudits/audit/catalog/ForeignKeyTypeMatchAudit.java @@ -33,107 +33,8 @@ public class ForeignKeyTypeMatchAudit { private final CatalogQueries catalogQueries; private final DatabasePlatform platform; - /** - * pg_catalog pairs each FK column with its referenced column positionally - * via {@code conkey}/{@code confkey}; {@code format_type} renders the full - * declared type (with modifiers, e.g. {@code character varying(10)}). - */ - private static final String POSTGRESQL_FK_COLUMN_TYPES_SQL = - """ - SELECT cl.relname AS table_name, - c.conname AS constraint_name, - a.attname AS column_name, - format_type(a.atttypid, a.atttypmod) AS column_type, - ref.relname AS referenced_table, - ra.attname AS referenced_column, - format_type(ra.atttypid, ra.atttypmod) AS referenced_type - FROM pg_constraint c - JOIN pg_class cl ON cl.oid = c.conrelid - JOIN pg_class ref ON ref.oid = c.confrelid - CROSS JOIN LATERAL unnest(c.conkey, c.confkey) - WITH ORDINALITY AS k(attnum, refattnum, ordinal) - JOIN pg_attribute a ON a.attrelid = c.conrelid AND a.attnum = k.attnum - JOIN pg_attribute ra ON ra.attrelid = c.confrelid AND ra.attnum = k.refattnum - WHERE c.contype = 'f' - AND c.connamespace = ?::regnamespace - ORDER BY 1, 2, k.ordinal - """; - - /** - * key_column_usage carries the referenced table and column directly on - * MySQL/MariaDB; {@code column_type} is the full declared type including - * length and signedness (e.g. {@code varchar(10)}, - * {@code bigint unsigned}). - */ - private static final String MYSQL_FK_COLUMN_TYPES_SQL = """ - SELECT k.table_name AS table_name, - k.constraint_name AS constraint_name, - k.column_name AS column_name, - col.column_type AS column_type, - k.referenced_table_name AS referenced_table, - k.referenced_column_name AS referenced_column, - rcol.column_type AS referenced_type - FROM information_schema.key_column_usage k - JOIN information_schema.columns col - ON col.table_schema = k.table_schema - AND col.table_name = k.table_name - AND col.column_name = k.column_name - JOIN information_schema.columns rcol - ON rcol.table_schema = k.referenced_table_schema - AND rcol.table_name = k.referenced_table_name - AND rcol.column_name = k.referenced_column_name - WHERE k.table_schema = ? - AND k.referenced_table_name IS NOT NULL - ORDER BY 1, 2, k.ordinal_position - """; - - /** - * Standard information_schema: {@code position_in_unique_constraint} maps - * each FK column to the referenced unique/PK constraint's column at that - * position. The declared type is composed from {@code data_type} plus the - * character length when present ({@code '(' || NULL || ')'} concatenates to - * NULL, so COALESCE drops it). - */ - private static final String H2_FK_COLUMN_TYPES_SQL = - """ - SELECT tc.table_name AS table_name, - tc.constraint_name AS constraint_name, - kcu.column_name AS column_name, - col.data_type || COALESCE('(' || col.character_maximum_length || ')', '') AS column_type, - ref_kcu.table_name AS referenced_table, - ref_kcu.column_name AS referenced_column, - rcol.data_type || COALESCE('(' || rcol.character_maximum_length || ')', '') AS referenced_type - FROM information_schema.table_constraints tc - JOIN information_schema.key_column_usage kcu - ON kcu.constraint_schema = tc.constraint_schema - AND kcu.constraint_name = tc.constraint_name - AND kcu.table_name = tc.table_name - JOIN information_schema.referential_constraints rc - ON rc.constraint_schema = tc.constraint_schema - AND rc.constraint_name = tc.constraint_name - JOIN information_schema.key_column_usage ref_kcu - ON ref_kcu.constraint_schema = rc.unique_constraint_schema - AND ref_kcu.constraint_name = rc.unique_constraint_name - AND ref_kcu.ordinal_position = kcu.position_in_unique_constraint - JOIN information_schema.columns col - ON col.table_schema = kcu.table_schema - AND col.table_name = kcu.table_name - AND col.column_name = kcu.column_name - JOIN information_schema.columns rcol - ON rcol.table_schema = ref_kcu.table_schema - AND rcol.table_name = ref_kcu.table_name - AND rcol.column_name = ref_kcu.column_name - WHERE tc.constraint_type = 'FOREIGN KEY' - AND tc.table_schema = ? - ORDER BY 1, 2, kcu.ordinal_position - """; - String sql() { - return switch (platform) { - case POSTGRESQL -> POSTGRESQL_FK_COLUMN_TYPES_SQL; - case MYSQL, MARIADB -> MYSQL_FK_COLUMN_TYPES_SQL; - case H2 -> H2_FK_COLUMN_TYPES_SQL; - }; + return platform.catalogDialect().foreignKeyColumnTypesSql(); } /** diff --git a/src/main/java/io/github/databaseaudits/audit/catalog/PrimaryKeyPresenceAudit.java b/src/main/java/io/github/databaseaudits/audit/catalog/PrimaryKeyPresenceAudit.java index dc026cb..c90f2f4 100644 --- a/src/main/java/io/github/databaseaudits/audit/catalog/PrimaryKeyPresenceAudit.java +++ b/src/main/java/io/github/databaseaudits/audit/catalog/PrimaryKeyPresenceAudit.java @@ -34,30 +34,8 @@ public class PrimaryKeyPresenceAudit { private final CatalogQueries catalogQueries; private final DatabasePlatform platform; - /** - * Standard information_schema, valid as-is on PostgreSQL, MySQL, MariaDB, - * and H2. - */ - private static final String INFORMATION_SCHEMA_TABLES_WITHOUT_PK_SQL = """ - SELECT t.table_name - FROM information_schema.tables t - WHERE t.table_schema = ? - AND t.table_type = 'BASE TABLE' - AND NOT EXISTS ( - SELECT 1 - FROM information_schema.table_constraints tc - WHERE tc.table_schema = t.table_schema - AND tc.table_name = t.table_name - AND tc.constraint_type = 'PRIMARY KEY' - ) - ORDER BY t.table_name - """; - String sql() { - return switch (platform) { - case POSTGRESQL, MYSQL, MARIADB, H2 -> - INFORMATION_SCHEMA_TABLES_WITHOUT_PK_SQL; - }; + return platform.catalogDialect().tablesWithoutPrimaryKeySql(); } /** diff --git a/src/main/java/io/github/databaseaudits/audit/runtime/plan/CapturedSqlPlanAuditTemplate.java b/src/main/java/io/github/databaseaudits/audit/runtime/plan/CapturedSqlPlanAuditTemplate.java index b7dded2..3518d48 100644 --- a/src/main/java/io/github/databaseaudits/audit/runtime/plan/CapturedSqlPlanAuditTemplate.java +++ b/src/main/java/io/github/databaseaudits/audit/runtime/plan/CapturedSqlPlanAuditTemplate.java @@ -184,7 +184,7 @@ private List findingsOf(final TreeMap violations) { */ protected final void collectChildFindings(final JsonNode node, final List findings, final Set excludedRelations) { - final JsonNode planNodes = node.get("Plans"); + final JsonNode planNodes = node.get(PlanJson.PLANS); if (planNodes != null) { for (final JsonNode planNode : planNodes) { collectFindings(planNode, findings, excludedRelations); @@ -205,11 +205,11 @@ protected final void collectChildFindings(final JsonNode node, return null; } final String relation = - queryPlanExplainer.textOf(node, "Relation Name"); + queryPlanExplainer.textOf(node, PlanJson.RELATION_NAME); if (relation != null) { return relation; } - final JsonNode planNodes = node.get("Plans"); + final JsonNode planNodes = node.get(PlanJson.PLANS); if (planNodes != null) { for (final JsonNode planNode : planNodes) { final String found = firstRelationName(planNode); diff --git a/src/main/java/io/github/databaseaudits/audit/runtime/plan/JoinIndexAudit.java b/src/main/java/io/github/databaseaudits/audit/runtime/plan/JoinIndexAudit.java index fce8cd9..a3f8ec1 100644 --- a/src/main/java/io/github/databaseaudits/audit/runtime/plan/JoinIndexAudit.java +++ b/src/main/java/io/github/databaseaudits/audit/runtime/plan/JoinIndexAudit.java @@ -81,8 +81,9 @@ protected void collectFindings(final JsonNode node, private void addSurvivingHashOrMergeJoin(final JsonNode node, final List findings, final Set excludedRelations) { - final String type = queryPlanExplainer.textOf(node, "Node Type"); - if ("Hash Join".equals(type) || "Merge Join".equals(type)) { + final String type = queryPlanExplainer.textOf(node, PlanJson.NODE_TYPE); + if (PlanJson.HASH_JOIN.equals(type) + || PlanJson.MERGE_JOIN.equals(type)) { final String relation = firstRelationName(innerChildOf(node)); addJoinFinding(type, relation, joinConditionOf(node, null), findings, excludedRelations); @@ -91,13 +92,13 @@ private void addSurvivingHashOrMergeJoin(final JsonNode node, private void addNestedLoopWithInnerSeqScan(final JsonNode node, final List findings, final Set excludedRelations) { - if ("Nested Loop" - .equals(queryPlanExplainer.textOf(node, "Node Type"))) { + if (PlanJson.NESTED_LOOP + .equals(queryPlanExplainer.textOf(node, PlanJson.NODE_TYPE))) { final JsonNode innerScan = unwrapPassThrough(innerChildOf(node)); - if (innerScan != null && "Seq Scan".equals( - queryPlanExplainer.textOf(innerScan, "Node Type"))) { - final String relation = - queryPlanExplainer.textOf(innerScan, "Relation Name"); + if (innerScan != null && PlanJson.SEQ_SCAN.equals( + queryPlanExplainer.textOf(innerScan, PlanJson.NODE_TYPE))) { + final String relation = queryPlanExplainer.textOf(innerScan, + PlanJson.RELATION_NAME); addJoinFinding("Nested Loop with inner Seq Scan", relation, joinConditionOf(node, innerScan), findings, excludedRelations); @@ -119,22 +120,22 @@ private void addJoinFinding(final String description, final String relation, private String joinConditionOf(final JsonNode joinNode, final JsonNode innerScan) { final String hashCond = - queryPlanExplainer.textOf(joinNode, "Hash Cond"); + queryPlanExplainer.textOf(joinNode, PlanJson.HASH_COND); if (hashCond != null) { return hashCond; } final String mergeCond = - queryPlanExplainer.textOf(joinNode, "Merge Cond"); + queryPlanExplainer.textOf(joinNode, PlanJson.MERGE_COND); if (mergeCond != null) { return mergeCond; } final String joinFilter = - queryPlanExplainer.textOf(joinNode, "Join Filter"); + queryPlanExplainer.textOf(joinNode, PlanJson.JOIN_FILTER); if (joinFilter != null) { return joinFilter; } final String innerFilter = innerScan == null ? null - : queryPlanExplainer.textOf(innerScan, "Filter"); + : queryPlanExplainer.textOf(innerScan, PlanJson.FILTER); return innerFilter == null ? "(join condition not shown)" : innerFilter; } @@ -143,13 +144,13 @@ private String joinConditionOf(final JsonNode joinNode, * have to serve. */ private JsonNode innerChildOf(final JsonNode node) { - final JsonNode planNodes = node.get("Plans"); + final JsonNode planNodes = node.get(PlanJson.PLANS); if (planNodes == null) { return null; } for (final JsonNode planNode : planNodes) { - if ("Inner".equals(queryPlanExplainer.textOf(planNode, - "Parent Relationship"))) { + if (PlanJson.INNER.equals(queryPlanExplainer.textOf(planNode, + PlanJson.PARENT_RELATIONSHIP))) { return planNode; } } @@ -163,8 +164,8 @@ private JsonNode innerChildOf(final JsonNode node) { private JsonNode unwrapPassThrough(final JsonNode node) { JsonNode current = node; while (current != null && isPassThrough( - queryPlanExplainer.textOf(current, "Node Type"))) { - final JsonNode planNodes = current.get("Plans"); + queryPlanExplainer.textOf(current, PlanJson.NODE_TYPE))) { + final JsonNode planNodes = current.get(PlanJson.PLANS); current = planNodes != null && !planNodes.isEmpty() ? planNodes.get(0) : null; @@ -173,9 +174,10 @@ private JsonNode unwrapPassThrough(final JsonNode node) { } private boolean isPassThrough(final String nodeType) { - return "Hash".equals(nodeType) || "Sort".equals(nodeType) - || "Incremental Sort".equals(nodeType) - || "Materialize".equals(nodeType) || "Memoize".equals(nodeType); + return PlanJson.HASH.equals(nodeType) || PlanJson.SORT.equals(nodeType) + || PlanJson.INCREMENTAL_SORT.equals(nodeType) + || PlanJson.MATERIALIZE.equals(nodeType) + || PlanJson.MEMOIZE.equals(nodeType); } @Override diff --git a/src/main/java/io/github/databaseaudits/audit/runtime/plan/OrderByIndexAudit.java b/src/main/java/io/github/databaseaudits/audit/runtime/plan/OrderByIndexAudit.java index 03ae5f6..daf4079 100644 --- a/src/main/java/io/github/databaseaudits/audit/runtime/plan/OrderByIndexAudit.java +++ b/src/main/java/io/github/databaseaudits/audit/runtime/plan/OrderByIndexAudit.java @@ -71,8 +71,9 @@ protected void collectFindings(final JsonNode node, private void addUnindexedSort(final JsonNode node, final List findings, final Set excludedRelations) { - final String type = queryPlanExplainer.textOf(node, "Node Type"); - if ("Sort".equals(type) || "Incremental Sort".equals(type)) { + final String type = queryPlanExplainer.textOf(node, PlanJson.NODE_TYPE); + if (PlanJson.SORT.equals(type) + || PlanJson.INCREMENTAL_SORT.equals(type)) { final String relation = firstRelationName(node); if (relation == null || !excludedRelations.contains(relation)) { final String onRelation = @@ -83,7 +84,7 @@ private void addUnindexedSort(final JsonNode node, } private String sortKeyOf(final JsonNode sortNode) { - final JsonNode key = sortNode.get("Sort Key"); + final JsonNode key = sortNode.get(PlanJson.SORT_KEY); if (key == null || !key.isArray()) { return "(unknown key)"; } diff --git a/src/main/java/io/github/databaseaudits/audit/runtime/plan/PlanJson.java b/src/main/java/io/github/databaseaudits/audit/runtime/plan/PlanJson.java new file mode 100644 index 0000000..2681bc7 --- /dev/null +++ b/src/main/java/io/github/databaseaudits/audit/runtime/plan/PlanJson.java @@ -0,0 +1,42 @@ +package io.github.databaseaudits.audit.runtime.plan; + +/** + * The PostgreSQL {@code EXPLAIN (FORMAT JSON)} field names and {@code Node Type} + * values the plan-based audits match on, centralized so the production detection + * logic carries no repeated magic strings. They mirror PostgreSQL's plan-tree + * output. + * + *

+ * The audit unit tests build their fixture plans as literal JSON text — a Java + * constant cannot sit inside a JSON string literal — so these constants are used + * by the production code only, not the tests. + */ +final class PlanJson { + // EXPLAIN JSON field names. + static final String NODE_TYPE = "Node Type"; + static final String RELATION_NAME = "Relation Name"; + static final String PLANS = "Plans"; + static final String PARENT_RELATIONSHIP = "Parent Relationship"; + static final String FILTER = "Filter"; + static final String SORT_KEY = "Sort Key"; + static final String HASH_COND = "Hash Cond"; + static final String MERGE_COND = "Merge Cond"; + static final String JOIN_FILTER = "Join Filter"; + + // "Node Type" values the audits detect. + static final String SEQ_SCAN = "Seq Scan"; + static final String SORT = "Sort"; + static final String INCREMENTAL_SORT = "Incremental Sort"; + static final String HASH_JOIN = "Hash Join"; + static final String MERGE_JOIN = "Merge Join"; + static final String NESTED_LOOP = "Nested Loop"; + static final String HASH = "Hash"; + static final String MATERIALIZE = "Materialize"; + static final String MEMOIZE = "Memoize"; + + // "Parent Relationship" value marking a join's inner (rescanned) child. + static final String INNER = "Inner"; + + private PlanJson() { + } +} diff --git a/src/main/java/io/github/databaseaudits/audit/runtime/plan/WhereClauseIndexAudit.java b/src/main/java/io/github/databaseaudits/audit/runtime/plan/WhereClauseIndexAudit.java index 0f28dfd..ae3f800 100644 --- a/src/main/java/io/github/databaseaudits/audit/runtime/plan/WhereClauseIndexAudit.java +++ b/src/main/java/io/github/databaseaudits/audit/runtime/plan/WhereClauseIndexAudit.java @@ -70,13 +70,14 @@ protected void collectFindings(final JsonNode node, private void addFilteredSeqScan(final JsonNode node, final List findings, final Set excludedRelations) { - if ("Seq Scan".equals(queryPlanExplainer.textOf(node, "Node Type")) - && node.hasNonNull("Filter")) { + if (PlanJson.SEQ_SCAN + .equals(queryPlanExplainer.textOf(node, PlanJson.NODE_TYPE)) + && node.hasNonNull(PlanJson.FILTER)) { final String relation = - queryPlanExplainer.textOf(node, "Relation Name"); + queryPlanExplainer.textOf(node, PlanJson.RELATION_NAME); if (relation == null || !excludedRelations.contains(relation)) { findings.add("Seq Scan on '" + relation + "' filtering " - + queryPlanExplainer.textOf(node, "Filter")); + + queryPlanExplainer.textOf(node, PlanJson.FILTER)); } } } diff --git a/src/main/java/io/github/databaseaudits/catalog/IndexCatalog.java b/src/main/java/io/github/databaseaudits/catalog/IndexCatalog.java index 6ccd491..0f58f6a 100644 --- a/src/main/java/io/github/databaseaudits/catalog/IndexCatalog.java +++ b/src/main/java/io/github/databaseaudits/catalog/IndexCatalog.java @@ -29,69 +29,6 @@ public class IndexCatalog { private final CatalogQueries catalogQueries; private final DatabasePlatform platform; - /** - * pg_catalog, not information_schema (which has no index views). - * {@code indkey} entries of {@code 0} are expression parts; the LEFT JOIN - * turns them into NULL column names. Invalid indexes (failed concurrent - * builds) are skipped, as are INCLUDE columns ({@code > indnkeyatts}). - */ - private static final String POSTGRESQL_SQL = - """ - SELECT t.relname AS table_name, - ic.relname AS index_name, - i.indisunique AS is_unique, - i.indisprimary AS is_primary, - (i.indpred IS NOT NULL) AS is_partial, - a.attname AS column_name - FROM pg_index i - JOIN pg_class t ON t.oid = i.indrelid - JOIN pg_class ic ON ic.oid = i.indexrelid - CROSS JOIN LATERAL unnest(string_to_array(i.indkey::text, ' ')::int2[]) - WITH ORDINALITY AS k(attnum, ordinal) - LEFT JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = k.attnum - WHERE t.relnamespace = ?::regnamespace - AND i.indisvalid - AND k.ordinal <= i.indnkeyatts - ORDER BY 1, 2, k.ordinal - """; - - /** - * A prefix part ({@code sub_part} set) indexes only the leading bytes of - * the column, so it is mapped to NULL like an expression part — it cannot - * be relied on to cover full-column lookups. - */ - private static final String MYSQL_SQL = - """ - SELECT s.table_name AS table_name, - s.index_name AS index_name, - (s.non_unique = 0) AS is_unique, - (s.index_name = 'PRIMARY') AS is_primary, - FALSE AS is_partial, - CASE WHEN s.sub_part IS NULL THEN s.column_name END AS column_name - FROM information_schema.statistics s - WHERE s.table_schema = ? - AND s.index_type NOT IN ('FULLTEXT', 'SPATIAL') - ORDER BY 1, 2, s.seq_in_index - """; - - private static final String H2_SQL = """ - SELECT ic.table_name AS table_name, - ic.index_name AS index_name, - (i.index_type_name = 'PRIMARY KEY' - OR i.index_type_name LIKE '%UNIQUE%') AS is_unique, - (i.index_type_name = 'PRIMARY KEY') AS is_primary, - FALSE AS is_partial, - ic.column_name AS column_name - FROM information_schema.index_columns ic - JOIN information_schema.indexes i - ON i.index_schema = ic.index_schema - AND i.index_name = ic.index_name - AND i.table_name = ic.table_name - WHERE ic.table_schema = ? - AND i.index_type_name <> 'SPATIAL INDEX' - ORDER BY 1, 2, ic.ordinal_position - """; - /** * Returns every index of {@code schema} with its key columns in index * order, via the platform's catalog SQL. @@ -111,11 +48,7 @@ public List readAll(final String schema) { * @return The platform-specific SQL string. */ public String sql() { - return switch (platform) { - case POSTGRESQL -> POSTGRESQL_SQL; - case MYSQL, MARIADB -> MYSQL_SQL; - case H2 -> H2_SQL; - }; + return platform.catalogDialect().indexCatalogSql(); } /** diff --git a/src/main/java/io/github/databaseaudits/platform/CatalogDialect.java b/src/main/java/io/github/databaseaudits/platform/CatalogDialect.java new file mode 100644 index 0000000..7cee6a3 --- /dev/null +++ b/src/main/java/io/github/databaseaudits/platform/CatalogDialect.java @@ -0,0 +1,99 @@ +package io.github.databaseaudits.platform; + +/** + * The per-engine catalog SQL the catalog audits run. Each {@link DatabasePlatform} + * holds one dialect ({@link DatabasePlatform#catalogDialect()}); the catalog audits + * and {@link io.github.databaseaudits.catalog.IndexCatalog} ask the platform's dialect + * for their SQL instead of switching on the platform. + * + *

+ * The three abstract methods return SQL that genuinely diverges between engines + * — PostgreSQL's {@code pg_catalog}, MySQL/MariaDB's {@code information_schema.statistics} + * and {@code key_column_usage}, H2's {@code information_schema}. A new engine's dialect + * will not compile until it supplies all three, so the compiler enforces coverage the way + * the old exhaustive {@code switch}es did. The two default methods return the + * standard {@code information_schema} SQL every supported engine shares; an engine with the + * standard layout inherits them unchanged. + * + *

+ * To add an engine, add a {@link DatabasePlatform} constant with a {@code CatalogDialect} + * — a new implementation for a divergent catalog, or an existing one (MariaDB reuses + * {@link MysqlCatalogDialect}). + */ +public interface CatalogDialect { + /** + * Returns the SQL that reads every index of a schema with its key columns in index + * order (one row per key column). Diverges per engine. + * + * @return the index-catalog SQL. + */ + String indexCatalogSql(); + + /** + * Returns the SQL that reads every foreign key of a schema — its columns in + * constraint order and its referenced table. Diverges per engine. + * + * @return the foreign-keys SQL. + */ + String foreignKeysSql(); + + /** + * Returns the SQL that reads every foreign key column of a schema paired with its + * declared type and its referenced column's declared type. Diverges per engine. + * + * @return the foreign-key-column-types SQL. + */ + String foreignKeyColumnTypesSql(); + + /** + * Returns the SQL that lists every base table of a schema with no {@code PRIMARY KEY}. + * Standard {@code information_schema}, valid as-is on PostgreSQL, MySQL, MariaDB, and H2. + * + * @return the tables-without-primary-key SQL. + */ + default String tablesWithoutPrimaryKeySql() { + return """ + SELECT t.table_name + FROM information_schema.tables t + WHERE t.table_schema = ? + AND t.table_type = 'BASE TABLE' + AND NOT EXISTS ( + SELECT 1 + FROM information_schema.table_constraints tc + WHERE tc.table_schema = t.table_schema + AND tc.table_name = t.table_name + AND tc.constraint_type = 'PRIMARY KEY' + ) + ORDER BY t.table_name + """; + } + + /** + * Returns the SQL that reads every nullable foreign key column of a schema. Standard + * {@code information_schema}, valid as-is on PostgreSQL, MySQL, MariaDB, and H2; the + * join includes {@code table_name} because constraint names are only unique per table + * on PostgreSQL and MySQL. + * + * @return the nullable-foreign-key-column SQL. + */ + default String nullableForeignKeyColumnSql() { + return """ + SELECT kcu.table_name AS table_name, + kcu.constraint_name AS constraint_name, + kcu.column_name AS column_name + FROM information_schema.table_constraints tc + JOIN information_schema.key_column_usage kcu + ON kcu.constraint_schema = tc.constraint_schema + AND kcu.constraint_name = tc.constraint_name + AND kcu.table_name = tc.table_name + JOIN information_schema.columns col + ON col.table_schema = kcu.table_schema + AND col.table_name = kcu.table_name + AND col.column_name = kcu.column_name + WHERE tc.constraint_type = 'FOREIGN KEY' + AND tc.table_schema = ? + AND col.is_nullable = 'YES' + ORDER BY 1, 2, 3 + """; + } +} diff --git a/src/main/java/io/github/databaseaudits/platform/DatabasePlatform.java b/src/main/java/io/github/databaseaudits/platform/DatabasePlatform.java index a1e3da4..5a5f186 100644 --- a/src/main/java/io/github/databaseaudits/platform/DatabasePlatform.java +++ b/src/main/java/io/github/databaseaudits/platform/DatabasePlatform.java @@ -27,28 +27,48 @@ * {@code DatabaseAuditTestConfiguration} does exactly that). * *

- * To add a platform, add an enum value: every per-audit SQL {@code switch} is - * exhaustive with no {@code default}, so the compiler then flags each place - * that needs SQL for the new platform. + * To add a platform, add an enum value with its {@link CatalogDialect}: the + * constant's constructor requires one, and a divergent dialect's abstract + * methods will not compile until they supply the per-engine SQL — so the + * compiler still flags each place that needs SQL for the new platform (an + * engine with the standard {@code information_schema} layout reuses an existing + * dialect or inherits the shared default SQL). */ public enum DatabasePlatform { /** H2 2.x (the 1.x information_schema had a different layout). */ - H2, + H2(new H2CatalogDialect()), /** * MariaDB 10.6+. (Connecting to MariaDB through MySQL Connector/J detects * as {@link #MYSQL} — same SQL.) */ - MARIADB, + MARIADB(new MysqlCatalogDialect()), /** MySQL 8+. Aurora MySQL reports as MySQL. */ - MYSQL, + MYSQL(new MysqlCatalogDialect()), /** * PostgreSQL 11+ for the catalog audits, 16+ for the plan audits. Aurora * PostgreSQL reports as PostgreSQL. */ - POSTGRESQL; + POSTGRESQL(new PostgresqlCatalogDialect()); + + private final CatalogDialect catalogDialect; + + DatabasePlatform(final CatalogDialect catalogDialect) { + this.catalogDialect = catalogDialect; + } + + /** + * Returns this platform's catalog SQL dialect — the source of the catalog + * audits' and {@link io.github.databaseaudits.catalog.IndexCatalog}'s + * per-engine SQL. + * + * @return this platform's catalog dialect. + */ + public CatalogDialect catalogDialect() { + return catalogDialect; + } private static final String FAILED_OBTAINING_DB_FROM_DATA_SOURCE_MSG = "Could not read the database product name from the DataSource"; diff --git a/src/main/java/io/github/databaseaudits/platform/H2CatalogDialect.java b/src/main/java/io/github/databaseaudits/platform/H2CatalogDialect.java new file mode 100644 index 0000000..5f8ce93 --- /dev/null +++ b/src/main/java/io/github/databaseaudits/platform/H2CatalogDialect.java @@ -0,0 +1,110 @@ +package io.github.databaseaudits.platform; + +/** + * The {@link CatalogDialect} for H2 2.x, reading from its {@code information_schema} + * (the 1.x layout differed). + */ +public final class H2CatalogDialect implements CatalogDialect { + @Override + public String indexCatalogSql() { + return """ + SELECT ic.table_name AS table_name, + ic.index_name AS index_name, + (i.index_type_name = 'PRIMARY KEY' + OR i.index_type_name LIKE '%UNIQUE%') AS is_unique, + (i.index_type_name = 'PRIMARY KEY') AS is_primary, + FALSE AS is_partial, + ic.column_name AS column_name + FROM information_schema.index_columns ic + JOIN information_schema.indexes i + ON i.index_schema = ic.index_schema + AND i.index_name = ic.index_name + AND i.table_name = ic.table_name + WHERE ic.table_schema = ? + AND i.index_type_name <> 'SPATIAL INDEX' + ORDER BY 1, 2, ic.ordinal_position + """; + } + + /** + * Standard information_schema; constraint names are unique per schema on + * H2, so the joins are exact. + */ + @Override + public String foreignKeysSql() { + return """ + SELECT tc.table_name AS table_name, + tc.constraint_name AS constraint_name, + ref_tc.table_name AS referenced_table, + kcu.column_name AS column_name + FROM information_schema.table_constraints tc + JOIN information_schema.key_column_usage kcu + ON kcu.constraint_schema = tc.constraint_schema + AND kcu.constraint_name = tc.constraint_name + AND kcu.table_name = tc.table_name + JOIN information_schema.referential_constraints rc + ON rc.constraint_schema = tc.constraint_schema + AND rc.constraint_name = tc.constraint_name + LEFT JOIN information_schema.table_constraints ref_tc + ON ref_tc.constraint_schema = rc.unique_constraint_schema + AND ref_tc.constraint_name = rc.unique_constraint_name + WHERE tc.constraint_type = 'FOREIGN KEY' + AND tc.table_schema = ? + ORDER BY 1, 2, kcu.ordinal_position + """; + } + + /** + * Standard information_schema: {@code position_in_unique_constraint} maps + * each FK column to the referenced unique/PK constraint's column at that + * position. The declared type is composed from {@code data_type} plus the + * character length for text types and, for the decimal family (radix 10), + * the precision and scale — so {@code DECIMAL(10,2)} and {@code DECIMAL(5,0)} + * render distinctly instead of both collapsing to a bare {@code NUMERIC} + * (which would hide the mismatch that PostgreSQL's {@code format_type} and + * MySQL's {@code column_type} both catch). The radix-2 integer types keep + * their bare {@code data_type}, and {@code '(' || NULL || ')'} concatenates + * to NULL so COALESCE drops the length where it does not apply. + */ + @Override + public String foreignKeyColumnTypesSql() { + return """ + SELECT tc.table_name AS table_name, + tc.constraint_name AS constraint_name, + kcu.column_name AS column_name, + col.data_type || COALESCE('(' || col.character_maximum_length || ')', '') + || CASE WHEN col.numeric_precision_radix = 10 + THEN '(' || col.numeric_precision || ',' || col.numeric_scale || ')' + ELSE '' END AS column_type, + ref_kcu.table_name AS referenced_table, + ref_kcu.column_name AS referenced_column, + rcol.data_type || COALESCE('(' || rcol.character_maximum_length || ')', '') + || CASE WHEN rcol.numeric_precision_radix = 10 + THEN '(' || rcol.numeric_precision || ',' || rcol.numeric_scale || ')' + ELSE '' END AS referenced_type + FROM information_schema.table_constraints tc + JOIN information_schema.key_column_usage kcu + ON kcu.constraint_schema = tc.constraint_schema + AND kcu.constraint_name = tc.constraint_name + AND kcu.table_name = tc.table_name + JOIN information_schema.referential_constraints rc + ON rc.constraint_schema = tc.constraint_schema + AND rc.constraint_name = tc.constraint_name + JOIN information_schema.key_column_usage ref_kcu + ON ref_kcu.constraint_schema = rc.unique_constraint_schema + AND ref_kcu.constraint_name = rc.unique_constraint_name + AND ref_kcu.ordinal_position = kcu.position_in_unique_constraint + JOIN information_schema.columns col + ON col.table_schema = kcu.table_schema + AND col.table_name = kcu.table_name + AND col.column_name = kcu.column_name + JOIN information_schema.columns rcol + ON rcol.table_schema = ref_kcu.table_schema + AND rcol.table_name = ref_kcu.table_name + AND rcol.column_name = ref_kcu.column_name + WHERE tc.constraint_type = 'FOREIGN KEY' + AND tc.table_schema = ? + ORDER BY 1, 2, kcu.ordinal_position + """; + } +} diff --git a/src/main/java/io/github/databaseaudits/platform/MysqlCatalogDialect.java b/src/main/java/io/github/databaseaudits/platform/MysqlCatalogDialect.java new file mode 100644 index 0000000..5731006 --- /dev/null +++ b/src/main/java/io/github/databaseaudits/platform/MysqlCatalogDialect.java @@ -0,0 +1,76 @@ +package io.github.databaseaudits.platform; + +/** + * The {@link CatalogDialect} for MySQL, reading from {@code information_schema}. + * {@link DatabasePlatform#MARIADB} reuses it — the catalog SQL is identical; a + * future MariaDB divergence becomes one overriding subclass. + */ +public final class MysqlCatalogDialect implements CatalogDialect { + /** + * A prefix part ({@code sub_part} set) indexes only the leading bytes of + * the column, so it is mapped to NULL like an expression part — it cannot + * be relied on to cover full-column lookups. + */ + @Override + public String indexCatalogSql() { + return """ + SELECT s.table_name AS table_name, + s.index_name AS index_name, + (s.non_unique = 0) AS is_unique, + (s.index_name = 'PRIMARY') AS is_primary, + FALSE AS is_partial, + CASE WHEN s.sub_part IS NULL THEN s.column_name END AS column_name + FROM information_schema.statistics s + WHERE s.table_schema = ? + AND s.index_type NOT IN ('FULLTEXT', 'SPATIAL') + ORDER BY 1, 2, s.seq_in_index + """; + } + + /** + * key_column_usage carries the referenced table directly on MySQL/MariaDB. + */ + @Override + public String foreignKeysSql() { + return """ + SELECT k.table_name AS table_name, + k.constraint_name AS constraint_name, + k.referenced_table_name AS referenced_table, + k.column_name AS column_name + FROM information_schema.key_column_usage k + WHERE k.table_schema = ? + AND k.referenced_table_name IS NOT NULL + ORDER BY 1, 2, k.ordinal_position + """; + } + + /** + * key_column_usage carries the referenced table and column directly on + * MySQL/MariaDB; {@code column_type} is the full declared type including + * length and signedness (e.g. {@code varchar(10)}, {@code bigint unsigned}). + */ + @Override + public String foreignKeyColumnTypesSql() { + return """ + SELECT k.table_name AS table_name, + k.constraint_name AS constraint_name, + k.column_name AS column_name, + col.column_type AS column_type, + k.referenced_table_name AS referenced_table, + k.referenced_column_name AS referenced_column, + rcol.column_type AS referenced_type + FROM information_schema.key_column_usage k + JOIN information_schema.columns col + ON col.table_schema = k.table_schema + AND col.table_name = k.table_name + AND col.column_name = k.column_name + JOIN information_schema.columns rcol + ON rcol.table_schema = k.referenced_table_schema + AND rcol.table_name = k.referenced_table_name + AND rcol.column_name = k.referenced_column_name + WHERE k.table_schema = ? + AND k.referenced_table_name IS NOT NULL + ORDER BY 1, 2, k.ordinal_position + """; + } +} diff --git a/src/main/java/io/github/databaseaudits/platform/PostgresqlCatalogDialect.java b/src/main/java/io/github/databaseaudits/platform/PostgresqlCatalogDialect.java new file mode 100644 index 0000000..725d4bf --- /dev/null +++ b/src/main/java/io/github/databaseaudits/platform/PostgresqlCatalogDialect.java @@ -0,0 +1,80 @@ +package io.github.databaseaudits.platform; + +/** + * The {@link CatalogDialect} for PostgreSQL, reading from {@code pg_catalog}. + */ +public final class PostgresqlCatalogDialect implements CatalogDialect { + /** + * pg_catalog, not information_schema (which has no index views). + * {@code indkey} entries of {@code 0} are expression parts; the LEFT JOIN + * turns them into NULL column names. Invalid indexes (failed concurrent + * builds) are skipped, as are INCLUDE columns ({@code > indnkeyatts}). + */ + @Override + public String indexCatalogSql() { + return """ + SELECT t.relname AS table_name, + ic.relname AS index_name, + i.indisunique AS is_unique, + i.indisprimary AS is_primary, + (i.indpred IS NOT NULL) AS is_partial, + a.attname AS column_name + FROM pg_index i + JOIN pg_class t ON t.oid = i.indrelid + JOIN pg_class ic ON ic.oid = i.indexrelid + CROSS JOIN LATERAL unnest(string_to_array(i.indkey::text, ' ')::int2[]) + WITH ORDINALITY AS k(attnum, ordinal) + LEFT JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = k.attnum + WHERE t.relnamespace = ?::regnamespace + AND i.indisvalid + AND k.ordinal <= i.indnkeyatts + ORDER BY 1, 2, k.ordinal + """; + } + + @Override + public String foreignKeysSql() { + return """ + SELECT cl.relname AS table_name, + c.conname AS constraint_name, + ref.relname AS referenced_table, + a.attname AS column_name + FROM pg_constraint c + JOIN pg_class cl ON cl.oid = c.conrelid + JOIN pg_class ref ON ref.oid = c.confrelid + CROSS JOIN LATERAL unnest(c.conkey) WITH ORDINALITY AS k(attnum, ordinal) + JOIN pg_attribute a ON a.attrelid = c.conrelid AND a.attnum = k.attnum + WHERE c.contype = 'f' + AND c.connamespace = ?::regnamespace + ORDER BY 1, 2, k.ordinal + """; + } + + /** + * pg_catalog pairs each FK column with its referenced column positionally + * via {@code conkey}/{@code confkey}; {@code format_type} renders the full + * declared type (with modifiers, e.g. {@code character varying(10)}). + */ + @Override + public String foreignKeyColumnTypesSql() { + return """ + SELECT cl.relname AS table_name, + c.conname AS constraint_name, + a.attname AS column_name, + format_type(a.atttypid, a.atttypmod) AS column_type, + ref.relname AS referenced_table, + ra.attname AS referenced_column, + format_type(ra.atttypid, ra.atttypmod) AS referenced_type + FROM pg_constraint c + JOIN pg_class cl ON cl.oid = c.conrelid + JOIN pg_class ref ON ref.oid = c.confrelid + CROSS JOIN LATERAL unnest(c.conkey, c.confkey) + WITH ORDINALITY AS k(attnum, refattnum, ordinal) + JOIN pg_attribute a ON a.attrelid = c.conrelid AND a.attnum = k.attnum + JOIN pg_attribute ra ON ra.attrelid = c.confrelid AND ra.attnum = k.refattnum + WHERE c.contype = 'f' + AND c.connamespace = ?::regnamespace + ORDER BY 1, 2, k.ordinal + """; + } +} diff --git a/src/site/asciidoc/adding-a-database.adoc b/src/site/asciidoc/adding-a-database.adoc new file mode 100644 index 0000000..b5ca7aa --- /dev/null +++ b/src/site/asciidoc/adding-a-database.adoc @@ -0,0 +1,212 @@ += Adding a Database Platform + +This guide walks through adding support for a new database engine to `database-audits-core`. The design that +makes this a small, compiler-guided change — the per-engine `CatalogDialect` behind each `DatabasePlatform` — is +described in link:architecture.html[Architecture]; this page is the step-by-step. + +== What "a platform" is + +A platform is one `DatabasePlatform` enum constant paired with a `CatalogDialect` — the object that supplies the +catalog audits' per-engine SQL. The catalog audits (`PrimaryKeyPresenceAudit`, `ForeignKeyIndexAudit`, +`ForeignKeyNotNullAudit`, `ForeignKeyTypeMatchAudit`, `RedundantIndexAudit`) and `IndexCatalog` never switch on +the platform; they ask `platform.catalogDialect()` for the SQL they run: + +[source,java] +---- +return platform.catalogDialect().foreignKeysSql(); +---- + +So adding an engine is three moves: + +. Supply its catalog SQL as a `CatalogDialect` +. Add the enum constant +. Teach detection to recognize the engine's product name. + +The plan-based runtime audits (`WhereClauseIndexAudit`, `OrderByIndexAudit`, `JoinIndexAudit`) play no part — +they are PostgreSQL-only by design; see <>. + +== Before you start + +* The engine must expose catalog metadata your SQL can read — the standard `information_schema`, or an + engine-specific catalog such as PostgreSQL's `pg_catalog`. +* Decide whether the engine's catalog SQL matches an existing dialect. `MysqlCatalogDialect` and + `H2CatalogDialect` are the two shapes already covered (the `information_schema.statistics`/`key_column_usage` + layout and the standard `information_schema` layout). If your engine is wire-compatible with one of them, you + may reuse it rather than write a new dialect — as `MARIADB` reuses `MysqlCatalogDialect`. + +[#step-dialect] +== Step 1 — Provide a `CatalogDialect` + +`CatalogDialect` declares three *abstract* methods (SQL that genuinely diverges between engines) and two +*default* methods (standard `information_schema` SQL every supported engine shares). A new dialect must +implement the three abstract methods; it inherits the two defaults unless its `information_schema` diverges. + +Each statement takes exactly one bind parameter — the schema name (`?`) — and its result is read *by column +alias, case-insensitively* (`CatalogQueries` returns case-insensitive row maps, because PostgreSQL lower-cases +unquoted aliases while H2 upper-cases them). So the aliases below are the contract; the source column names and +catalog views are yours to choose. + +[cols="1,3", options="header"] +|=== +| Method | Must project (one row per …), aliased exactly + +| `indexCatalogSql()` + +_(abstract)_ +| One row per index **key column**, ordered by table, index, key position: `table_name`, `index_name`, +`is_unique`, `is_primary`, `is_partial`, `column_name`. Exclude full-text/spatial indexes and non-key +`INCLUDE` columns; map a prefix-only or expression key part to a NULL `column_name` (it cannot cover a +full-column lookup). Boolean flags may be SQL `BOOLEAN` or `0`/`1` — `IndexCatalog` accepts either. + +| `foreignKeysSql()` + +_(abstract)_ +| One row per foreign-key **column**, ordered by table, constraint, column position: `table_name`, +`constraint_name`, `referenced_table`, `column_name`. + +| `foreignKeyColumnTypesSql()` + +_(abstract)_ +| One row per foreign-key **column**, ordered by table, constraint, column position: `table_name`, +`constraint_name`, `column_name`, `column_type`, `referenced_table`, `referenced_column`, `referenced_type`. +The two `*_type` values must render the **fully-qualified declared type** — length, precision, and scale — so a +real mismatch (`varchar(10)` vs `varchar(20)`, `DECIMAL(10,2)` vs `DECIMAL(5,0)`) is visible rather than +collapsing to a bare type name. + +| `tablesWithoutPrimaryKeySql()` + +_(default)_ +| One row per base table with no `PRIMARY KEY`: `table_name`. Standard `information_schema`; override only if +the engine's differs. + +| `nullableForeignKeyColumnSql()` + +_(default)_ +| One row per nullable foreign-key column: `table_name`, `constraint_name`, `column_name`. Standard +`information_schema`; override only if the engine's differs. +|=== + +The class itself is a plain `final` implementation of the interface: + +[source,java] +---- +package io.github.databaseaudits.platform; + +/** The {@link CatalogDialect} for CockroachDB, reading from its {@code information_schema}. */ +public final class CockroachdbCatalogDialect implements CatalogDialect { + @Override + public String indexCatalogSql() { + return """ + SELECT ... AS table_name, + ... AS index_name, + ... AS is_unique, + ... AS is_primary, + ... AS is_partial, + ... AS column_name + FROM ... + WHERE ... = ? + ORDER BY 1, 2, ... + """; + } + + @Override + public String foreignKeysSql() { + return """ + SELECT ... AS table_name, + ... AS constraint_name, + ... AS referenced_table, + ... AS column_name + FROM ... + WHERE ... = ? + ORDER BY 1, 2, ... + """; + } + + @Override + public String foreignKeyColumnTypesSql() { + return """ + SELECT ... -- table_name, constraint_name, column_name, column_type, + ... -- referenced_table, referenced_column, referenced_type + FROM ... + WHERE ... = ? + ORDER BY 1, 2, ... + """; + } + + // tablesWithoutPrimaryKeySql() and nullableForeignKeyColumnSql() are inherited from + // CatalogDialect — override only if this engine's information_schema diverges. +} +---- + +*Tip:* Copy the closest existing dialect rather than starting blank. `H2CatalogDialect` is the reference for the +standard `information_schema` layout (`table_constraints` / `key_column_usage` / `referential_constraints`); +`MysqlCatalogDialect` is the reference for the `information_schema.statistics` + directly-referenced-table +layout; `PostgresqlCatalogDialect` is the reference for a fully engine-specific catalog (`pg_catalog`). + +[#step-constant] +== Step 2 — Add the `DatabasePlatform` constant + +Add an enum constant, passing the dialect — a new one, or an existing one you are reusing: + +[source,java] +---- +/** CockroachDB 23+. */ +COCKROACHDB(new CockroachdbCatalogDialect()), +---- + +The constant's constructor *requires* a dialect, and a new dialect will not compile until it implements all +three abstract methods — so the compiler flags every place that still needs SQL for the new engine, the same +completeness the old exhaustive `switch`es enforced, while the shared `information_schema` SQL is written once. + +[#step-detect] +== Step 3 — Teach detection to recognize the engine + +`DatabasePlatform.fromProductName(String)` maps the JDBC `DatabaseMetaData.getDatabaseProductName() +` (matched case-insensitively as a substring) to a constant. Add a branch: + +[source,java] +---- +} else if (name.contains("cockroach")) { + databasePlatform = COCKROACHDB; +---- + +`fromDataSource(DataSource)` — which the Spring integration calls once at startup — opens one connection, reads +the product name, and delegates here, so this single branch wires auto-detection everywhere. Keep the +unsupported-platform message's supported-list in sync. + +*Note:* A product that reports an *existing* product's name needs no new branch. Aurora PostgreSQL reports as +`PostgreSQL`; MariaDB reached through MySQL Connector/J reports as `MySQL`. Add a branch only for a genuinely +new product string. + +[#plan-audits] +== The plan audits stay PostgreSQL-only + +`WhereClauseIndexAudit`, `OrderByIndexAudit`, and `JoinIndexAudit` read query plans from +`EXPLAIN (GENERIC_PLAN, FORMAT JSON)` with planner-penalty GUCs. No other engine offers a parameter-free +generic-plan EXPLAIN, so these audits require `POSTGRESQL` and *fail fast* (rather than pass vacuously) on any +other platform. Adding a new engine gives it the catalog family, the JPA audit, and the capture-scan +`UnconditionalMutationAudit` — but not the plan audits, and that needs no action from you: they check the +platform themselves. (Teaching them a second engine would be a much larger change — a new generic-plan EXPLAIN +strategy — not part of adding a platform.) + +== Step 4 — Tests + +Mirror the tests that accompany the existing dialects: + +* **`CatalogDialectTest`** — a unit assertion that the new dialect returns non-blank SQL for every method and + that each statement carries its single `?` parameter and the aliases the audits read. +* **`CatalogDialectIT`** — an integration test that runs the dialect's SQL against a real instance of the + engine (a Testcontainers container, or the in-memory engine as `H2CatalogDialectIT` does) over a known schema, + asserting the projected rows. This is where a wrong alias, join, or ordering surfaces. +* **`DatabasePlatformTest`** — extend the detection cases so the new product name maps to the new constant. + +== Checklist + +* [ ] `CatalogDialect` implementation (or an existing dialect reused) covering the three abstract methods. +* [ ] `DatabasePlatform` constant wired to the dialect, with a doc comment naming the supported version range. +* [ ] `fromProductName` branch, and the supported-list in its error message updated. +* [ ] `CatalogDialectTest`, an engine `…CatalogDialectIT`, and `DatabasePlatformTest` updated. +* [ ] link:audits.html[Audits] / link:architecture.html[Architecture] platform lists updated if they enumerate engines. + +== Downstream: the Spring integration + +Once core recognizes the engine, the Spring integration audits it with **no code change** — +`DatabaseAuditTestConfiguration` detects the platform from the live `DataSource` at runtime. To let the +archetype *generate* a runnable demo harness for the new engine, follow the integration's +link:https://database-audits.github.io/spring-boot-integration/adding-a-database.html[Adding a Database Platform] +guide. diff --git a/src/site/asciidoc/adding-an-audit.adoc b/src/site/asciidoc/adding-an-audit.adoc new file mode 100644 index 0000000..3f8b55b --- /dev/null +++ b/src/site/asciidoc/adding-an-audit.adoc @@ -0,0 +1,161 @@ += Adding an Audit + +This guide walks through adding a new audit to `database-audits-core`. The families, their collaborators, and +the design rules an audit follows are described in link:architecture.html[Architecture]; this page is the +step-by-step for each family. + +== The shape of every audit + +An audit is a plain class — constructor injection, no dependency-injection annotations — whose `audit(...)` +method **returns a `List` of human-readable violations, empty when clean**. The audit never asserts and +never throws for a *finding*; the caller asserts. It throws only when it could not actually check anything (an +empty SQL capture, an unsupported platform), so a misconfigured run fails loudly instead of passing vacuously. +See *"Audits return findings; callers assert."* and *"Never pass vacuously."* in link:architecture.html[Architecture]. + +Pick the family your check belongs to — each has a recipe below: + +[cols="1,3", options="header"] +|=== +| Family | Use when your check reads … +| <> | database metadata (`information_schema` / `pg_catalog`) — deterministic, no test data needed. +| <> | the *plans* of the SQL the app ran — to prove an access path has no serving index (PostgreSQL-only). +| <> | the *text* of the SQL the app ran — e.g. a dangerous statement shape. +| <> | Hibernate's mapping model against the live schema. +|=== + +[#catalog] +== Catalog audit + +A catalog audit takes `CatalogQueries` (the JDBC-to-list-of-maps layer) plus whatever it needs to obtain SQL and +compare results: the `DatabasePlatform` when it runs per-engine catalog SQL, and/or `IndexCatalog` when it works +from indexes. `PrimaryKeyPresenceAudit` is the minimal example: + +[source,java] +---- +@AllArgsConstructor +public class XxxAudit { + private final CatalogQueries catalogQueries; + private final DatabasePlatform platform; + + String sql() { + return platform.catalogDialect().xxxSql(); // per-engine SQL from the dialect + } + + /** Returns one finding per offending row; an empty list when clean. */ + public List audit(final String schema, final Set excluded) { + return catalogQueries.queryForList(sql(), schema).stream() + .map(row -> String.valueOf(row.get("some_column"))) + .filter(finding -> !excluded.contains(finding)) + .toList(); + } +} +---- + +If the audit needs metadata no existing query returns, add a method to `CatalogDialect` — *abstract* if the SQL +diverges between engines, *default* if the standard `information_schema` SQL serves every engine (see +link:adding-a-database.html#step-dialect[Adding a Database — Step 1]). Read result columns *by alias, +case-insensitively*. Index-based audits (`ForeignKeyIndexAudit`, `RedundantIndexAudit`) take `IndexCatalog` +instead of writing their own SQL, and do the leading-prefix / containment comparison in plain Java where it is +unit-testable and platform-independent. + +[#plan] +== Runtime plan audit (PostgreSQL-only) + +The three EXPLAIN-driven audits share `CapturedSqlPlanAuditTemplate`, which owns the fixed algorithm — read the +capture, de-duplicate by statement shape, `EXPLAIN` each candidate with penalties applied, collect offending +nodes — *and* both vacuous-run guards (empty capture, wholly-unexplainable run). A new plan audit lives beside the template in the +`audit.runtime.plan` package (the template is package-private), extends it, and supplies only the four variation +points: + +[source,java] +---- +@Slf4j +public class XxxIndexAudit extends CapturedSqlPlanAuditTemplate { + public XxxIndexAudit(final QueryPlanExplainer explainer, + final SqlCapturingStatementInspector capturer) { + super(explainer, capturer); + } + + @Override + protected boolean isCandidate(final String upperCasedSql) { + return upperCasedSql.contains("..."); // which statements to EXPLAIN + } + + @Override + protected String[] plannerSettings() { + return new String[] { "enable_seqscan = off" }; // GUCs to penalize + } + + @Override + protected void collectFindings(final JsonNode plan, final List findings, + final Set excludedRelations) { + // walk the plan; add a finding for each surviving penalized node. + // firstRelationName(...) and collectChildFindings(...) are provided by the base. + } + + @Override + protected String statementNoun() { + return "XXX"; // for the vacuous-run guard message + } +} +---- + +You do not re-implement capture reading, de-duplication, the empty-capture guard, or the all-skipped guard — +they live in the template exactly once. Requires `preferQueryMode=simple` on the JDBC URL (generic-plan EXPLAIN +only works over the simple query protocol); the base's guard message says so when a run explains nothing. + +[#capture] +== Runtime capture-scan audit + +An audit that inspects the *text* rather than the *plan* of captured SQL takes only the +`SqlCapturingStatementInspector`, reads `capturedSql()`, and scans — `UnconditionalMutationAudit` is the model. +Guard the empty capture yourself so the audit never passes vacuously: + +[source,java] +---- +public List audit(final Set excludedStatements) { + final Set captured = sqlCapturer.capturedSql(); + if (captured.isEmpty()) { + throw new IllegalStateException(SqlCapturingStatementInspector.EMPTY_CAPTURE_MESSAGE); + } + return captured.stream()...toList(); +} +---- + +[#jpa] +== JPA mapping audit + +JPA audits validate Hibernate's boot mapping model against the live schema. `SchemaEntityValidationAudit` is the +sole example; it obtains the mapping model from a `MappingMetadataIntegrator` captured during bootstrap plus a +`DataSource`, and is built through a static factory (`forEntityManagerFactory(emf, dataSource)`) rather than a +public constructor. A new JPA audit follows the same route. This is the rarest family — reach for it only when +the check is genuinely about entity-to-schema mapping. + +== The standing directive — update the Spring beans in lockstep + +`database-audits-core` deliberately ships no Spring wiring; `database-audits-spring-boot` supplies it. Its +`DatabaseAuditSuite` calls every core audit constructor directly, and a paired `AuditAssertion` exposes +each audit's findings as a test assertion. + +*Important:* A new audit (or any change to an existing audit's constructor or public `audit(...)` signature) must +be matched in the Spring module. A compile failure there against core is the intended signal. The integration's +link:https://database-audits.github.io/spring-boot-integration/adding-an-audit.html[Adding an Audit] guide is the +other half of this recipe — and its roster-guard test fails the build if a new core audit has no wired +assertion, so the two stay in step. + +== Tests + +* A **`Test`** unit test over the finding logic — feed rows (catalog), plan JSON (plan), or captured + statements (capture) and assert the returned `List`. Assert whole lists with AssertJ. +* For a catalog audit, an **`…IT`** against a real engine (as the dialect ITs do) proves the SQL and the mapping + agree end-to-end. +* Cover both a clean run (empty list) and the cannot-run guard (the thrown `IllegalStateException` / + `UnsupportedOperationException`), so the never-pass-vacuously contract is enforced. + +== Checklist + +* [ ] Audit class in the right `audit.*` package, returning `List`, throwing only on a cannot-run condition. +* [ ] Any new `CatalogDialect` method (abstract vs default chosen per <>). +* [ ] Unit test, and an IT where the audit runs real SQL; clean *and* cannot-run cases covered. +* [ ] link:audits.html[Audits] reference entry for the new audit (what it detects, finding format, exclusion type). +* [ ] Spring module updated in lockstep — see the integration's Adding an Audit guide. diff --git a/src/site/asciidoc/architecture.adoc b/src/site/asciidoc/architecture.adoc index 5952d61..d2b5b9b 100644 --- a/src/site/asciidoc/architecture.adoc +++ b/src/site/asciidoc/architecture.adoc @@ -17,21 +17,25 @@ Both errors indicate a configuration problem to fix, not a clean schema. == `DatabasePlatform` — the hub -`DatabasePlatform` is an enum of the four supported products: `H2`, `MARIADB`, `MYSQL`, `POSTGRESQL`. Every -place in the library that needs platform-specific SQL holds an exhaustive `switch` with *no `default` branch*: +`DatabasePlatform` is an enum of the four supported products: `H2`, `MARIADB`, `MYSQL`, `POSTGRESQL`. Each +constant holds a `CatalogDialect` — the source of the catalog audits' per-engine SQL — reached through +`platform.catalogDialect()`: [source,java] ---- -return switch (platform) { -case POSTGRESQL -> POSTGRESQL_FK_SQL; -case MYSQL, MARIADB -> MYSQL_FK_SQL; -case H2 -> H2_FK_SQL; -}; +return platform.catalogDialect().foreignKeysSql(); ---- -Adding a new enum value makes the compiler flag every SQL site that needs updating — a compile-time guarantee of -completeness. Detect the platform once at startup with `DatabasePlatform.fromDataSource(DataSource)`, which -opens one connection, reads `DatabaseMetaData.getDatabaseProductName()`, and closes the connection. +`CatalogDialect` declares an *abstract* method for each query whose SQL genuinely diverges between engines +(`indexCatalogSql`, `foreignKeysSql`, `foreignKeyColumnTypesSql` — PostgreSQL's `pg_catalog`, MySQL's +`information_schema.statistics`/`key_column_usage`, H2's `information_schema`) and a *default* method for each +query whose standard `information_schema` SQL every engine shares (`tablesWithoutPrimaryKeySql`, +`nullableForeignKeyColumnSql`). `PostgresqlCatalogDialect`, `MysqlCatalogDialect`, and `H2CatalogDialect` supply +the divergent SQL; `MARIADB` reuses `MysqlCatalogDialect`. Adding an engine means adding an enum constant with a +dialect: the constant's constructor requires one, and a divergent dialect will not compile until it implements +every abstract method — the same compile-time completeness the old exhaustive `switch`es gave, while the shared +SQL is written once. Detect the platform once at startup with `DatabasePlatform.fromDataSource(DataSource)`, +which opens one connection, reads `DatabaseMetaData.getDatabaseProductName()`, and closes the connection. == Catalog family (all platforms) diff --git a/src/site/asciidoc/audits.adoc b/src/site/asciidoc/audits.adoc index 4c8def3..6f22444 100644 --- a/src/site/asciidoc/audits.adoc +++ b/src/site/asciidoc/audits.adoc @@ -32,7 +32,7 @@ Every foreign key constraint must be backed by an index whose leading columns ar FK causes slow child→parent lookups and lock-heavy parent `DELETE`/`UPDATE` (sequential child scan under a strong lock). Pass constraint names to skip as `excludedConstraints`. -NOTE: PostgreSQL and H2 do not auto-create an index for a foreign key. MySQL/MariaDB InnoDB does, so this +*Note:* PostgreSQL and H2 do not auto-create an index for a foreign key. MySQL/MariaDB InnoDB does, so this audit usually passes there, though it still catches an index dropped after the fact on MariaDB (permitted while `foreign_key_checks` is suspended; MySQL refuses such drops outright). diff --git a/src/site/site.xml b/src/site/site.xml index bd2fb22..2c29bd9 100644 --- a/src/site/site.xml +++ b/src/site/site.xml @@ -36,6 +36,8 @@

+ + diff --git a/src/test/java/io/github/databaseaudits/platform/CatalogDialectTest.java b/src/test/java/io/github/databaseaudits/platform/CatalogDialectTest.java new file mode 100644 index 0000000..ad6576f --- /dev/null +++ b/src/test/java/io/github/databaseaudits/platform/CatalogDialectTest.java @@ -0,0 +1,61 @@ +package io.github.databaseaudits.platform; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.Test; + +class CatalogDialectTest { + @Test + void testDivergentSql_EachDialect_ReadsItsOwnCatalog() { + final CatalogDialect postgresql = new PostgresqlCatalogDialect(); + assertThat(postgresql.indexCatalogSql()) + .as("PostgreSQL reads indexes from pg_catalog.") + .contains("pg_index"); + assertThat(postgresql.foreignKeysSql()) + .as("PostgreSQL reads foreign keys from pg_catalog.") + .contains("pg_constraint"); + assertThat(postgresql.foreignKeyColumnTypesSql()) + .as("PostgreSQL renders column types with format_type.") + .contains("format_type"); + + final CatalogDialect mysql = new MysqlCatalogDialect(); + assertThat(mysql.indexCatalogSql()) + .as("MySQL reads indexes from information_schema.statistics.") + .contains("information_schema.statistics"); + assertThat(mysql.foreignKeysSql()) + .as("MySQL reads the referenced table from key_column_usage.") + .contains("referenced_table_name"); + assertThat(mysql.foreignKeyColumnTypesSql()) + .as("MySQL reads the referenced column from key_column_usage.") + .contains("referenced_column_name"); + + final CatalogDialect h2 = new H2CatalogDialect(); + assertThat(h2.indexCatalogSql()) + .as("H2 reads indexes from information_schema.index_columns.") + .contains("information_schema.index_columns"); + assertThat(h2.foreignKeysSql()) + .as("H2 resolves the referenced table via referential_constraints.") + .contains("referential_constraints"); + assertThat(h2.foreignKeyColumnTypesSql()) + .as("H2 pairs FK columns via position_in_unique_constraint.") + .contains("position_in_unique_constraint"); + } + + @Test + void testSharedSql_EveryDialect_UsesTheSameInformationSchemaSql() { + final CatalogDialect postgresql = new PostgresqlCatalogDialect(); + final CatalogDialect mysql = new MysqlCatalogDialect(); + final CatalogDialect h2 = new H2CatalogDialect(); + + assertThat(postgresql.tablesWithoutPrimaryKeySql()) + .as("The tables-without-primary-key SQL is the standard information_schema SQL shared by every engine.") + .contains("BASE TABLE") + .isEqualTo(mysql.tablesWithoutPrimaryKeySql()) + .isEqualTo(h2.tablesWithoutPrimaryKeySql()); + assertThat(postgresql.nullableForeignKeyColumnSql()) + .as("The nullable-foreign-key-column SQL is the standard information_schema SQL shared by every engine.") + .contains("is_nullable") + .isEqualTo(mysql.nullableForeignKeyColumnSql()) + .isEqualTo(h2.nullableForeignKeyColumnSql()); + } +} diff --git a/src/test/java/io/github/databaseaudits/platform/DatabasePlatformTest.java b/src/test/java/io/github/databaseaudits/platform/DatabasePlatformTest.java index f93ff8f..9768d7b 100644 --- a/src/test/java/io/github/databaseaudits/platform/DatabasePlatformTest.java +++ b/src/test/java/io/github/databaseaudits/platform/DatabasePlatformTest.java @@ -65,6 +65,22 @@ void testFromDataSource_ConnectionFailure_ThrowsIllegalState() .withMessageContaining("product name"); } + @Test + void testCatalogDialect_EachPlatform_HoldsItsDialectType() { + assertThat(DatabasePlatform.POSTGRESQL.catalogDialect()) + .as("PostgreSQL uses its own catalog dialect.") + .isInstanceOf(PostgresqlCatalogDialect.class); + assertThat(DatabasePlatform.MYSQL.catalogDialect()) + .as("MySQL uses its own catalog dialect.") + .isInstanceOf(MysqlCatalogDialect.class); + assertThat(DatabasePlatform.MARIADB.catalogDialect()) + .as("MariaDB reuses the MySQL catalog dialect.") + .isInstanceOf(MysqlCatalogDialect.class); + assertThat(DatabasePlatform.H2.catalogDialect()) + .as("H2 uses its own catalog dialect.") + .isInstanceOf(H2CatalogDialect.class); + } + private static DataSource dataSourceReporting(final String productName) throws SQLException { final DatabaseMetaData metaData = mock(DatabaseMetaData.class); diff --git a/src/test/java/io/github/databaseaudits/platform/H2CatalogDialectIT.java b/src/test/java/io/github/databaseaudits/platform/H2CatalogDialectIT.java new file mode 100644 index 0000000..2b82e9a --- /dev/null +++ b/src/test/java/io/github/databaseaudits/platform/H2CatalogDialectIT.java @@ -0,0 +1,75 @@ +package io.github.databaseaudits.platform; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.sql.Connection; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Set; + +import org.h2.jdbcx.JdbcDataSource; +import org.junit.jupiter.api.Test; + +import io.github.databaseaudits.audit.catalog.ForeignKeyTypeMatchAudit; +import io.github.databaseaudits.jdbc.CatalogQueries; + +/** + * Verifies {@link H2CatalogDialect#foreignKeyColumnTypesSql()} renders the + * precision and scale of decimal-family columns, so + * {@link ForeignKeyTypeMatchAudit} catches a {@code DECIMAL(10,2)} foreign key + * that references a {@code DECIMAL(5,0)} key on H2 — the mismatch PostgreSQL's + * {@code format_type} and MySQL's {@code column_type} already catch. A bare + * {@code data_type} would collapse both to {@code NUMERIC} and miss it. Runs + * against embedded H2, so unlike the container-backed {@code CatalogAuditsIT} it + * needs no Docker. + */ +class H2CatalogDialectIT { + @Test + void testForeignKeyTypeMatchAudit_DecimalPrecisionMismatchOnH2_ReportsBothTypesWithPrecisionAndScale() + throws SQLException { + final JdbcDataSource dataSource = h2DataSource("fk_decimal_mismatch"); + createForeignKey(dataSource, "DECIMAL(5,0)", "DECIMAL(10,2)"); + final ForeignKeyTypeMatchAudit audit = new ForeignKeyTypeMatchAudit( + new CatalogQueries(dataSource), DatabasePlatform.H2); + + assertThat(audit.audit("PUBLIC", Set.of())) + .as("A DECIMAL(10,2) FK referencing a DECIMAL(5,0) key is a mismatch on H2, rendered with precision and scale.") + .anySatisfy(violation -> assertThat(violation) + .contains("CHILD.PARENT_REF") + .contains("NUMERIC(10,2)") + .contains("NUMERIC(5,0)")); + } + + @Test + void testForeignKeyTypeMatchAudit_EqualDecimalPrecisionOnH2_ReportsNoMismatch() + throws SQLException { + final JdbcDataSource dataSource = h2DataSource("fk_decimal_match"); + createForeignKey(dataSource, "DECIMAL(10,2)", "DECIMAL(10,2)"); + final ForeignKeyTypeMatchAudit audit = new ForeignKeyTypeMatchAudit( + new CatalogQueries(dataSource), DatabasePlatform.H2); + + assertThat(audit.audit("PUBLIC", Set.of())) + .as("Equal DECIMAL(10,2) types on both sides of the FK are not a mismatch.") + .isEmpty(); + } + + private static JdbcDataSource h2DataSource(final String name) { + final JdbcDataSource dataSource = new JdbcDataSource(); + dataSource.setURL("jdbc:h2:mem:" + name + ";DB_CLOSE_DELAY=-1"); + return dataSource; + } + + private static void createForeignKey(final JdbcDataSource dataSource, + final String parentKeyType, final String childColumnType) + throws SQLException { + try (Connection connection = dataSource.getConnection(); + Statement statement = connection.createStatement()) { + statement.execute( + "CREATE TABLE parent (id " + parentKeyType + " PRIMARY KEY)"); + statement.execute("CREATE TABLE child (" + + "id BIGINT PRIMARY KEY, " + + "parent_ref " + childColumnType + " NOT NULL, " + + "CONSTRAINT fk_child_parent FOREIGN KEY (parent_ref) REFERENCES parent(id))"); + } + } +}