diff --git a/.dockerignore b/.dockerignore
index 9b49ae35..6e8cbf5f 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -17,3 +17,4 @@ venv/
.ruff_cache/
deploy
!deploy/install_*.sh
+!deploy/generate_third_party_notices.py
diff --git a/README.md b/README.md
index 670c5ad9..ef49135b 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
# DataOps Data Quality TestGen
-  [](https://hub.docker.com/r/datakitchen/dataops-testgen) [](https://hub.docker.com/r/datakitchen/dataops-testgen) [](https://docs.datakitchen.io/articles/dataops-testgen-help/dataops-testgen-help) [](https://data-observability-slack.datakitchen.io/join)
+  [](https://hub.docker.com/r/datakitchen/dataops-testgen) [](https://hub.docker.com/r/datakitchen/dataops-testgen) [](https://docs.datakitchen.io/testgen/what-is-testgen/) [](https://data-observability-slack.datakitchen.io/join)
*
DataOps Data Quality TestGen, or "TestGen" for short, can help you find data issues so you can alert your users and notify your suppliers. It does this by delivering simple, fast data quality test generation and execution by data profiling, new dataset screening and hygiene review, algorithmic generation of data quality validation tests, ongoing production testing of new data refreshes, and continuous anomaly monitoring of datasets. TestGen is part of DataKitchen's Open Source Data Observability.
*
@@ -7,7 +7,7 @@
[DataOps TestGen Overview](https://datakitchen.io/dataops-testgen-product/)
-[DataOps TestGen Documentation](https://docs.datakitchen.io/articles/dataops-testgen-help/dataops-testgen-help)
+[DataOps TestGen Documentation](https://docs.datakitchen.io/testgen/what-is-testgen/)
## Features
@@ -68,7 +68,7 @@ Once the installation completes, verify that you can login to the UI with the UR
### Optional: Run the TestGen demo setup
-The [Data Observability quickstart](https://docs.datakitchen.io/articles/open-source-data-observability/data-observability-overview) walks you through DataOps Data Quality TestGen capabilities to demonstrate how it covers critical use cases for data and analytic teams.
+The [Data Observability quickstart](https://docs.datakitchen.io/tutorials/quickstart-demo/) walks you through DataOps Data Quality TestGen capabilities to demonstrate how it covers critical use cases for data and analytic teams.
```shell
python3 dk-installer.py tg run-demo
@@ -110,7 +110,7 @@ Within the virtual environment, install the TestGen package with pip.
pip install dataops-testgen
```
-Verify that the [_testgen_ command line](https://docs.datakitchen.io/articles/dataops-testgen-help/testgen-commands-and-details) works.
+Verify that the [_testgen_ command line](https://docs.datakitchen.io/testgen/cli-reference/) works.
```shell
testgen --help
```
@@ -165,7 +165,7 @@ Verify that you can login to the UI with the `TESTGEN_USERNAME` and `TESTGEN_PAS
### Optional: Run the TestGen demo setup
-The [Data Observability quickstart](https://docs.datakitchen.io/articles/open-source-data-observability/data-observability-overview) walks you through DataOps Data Quality TestGen capabilities to demonstrate how it covers critical use cases for data and analytic teams.
+The [Data Observability quickstart](https://docs.datakitchen.io/tutorials/quickstart-demo/) walks you through DataOps Data Quality TestGen capabilities to demonstrate how it covers critical use cases for data and analytic teams.
```shell
testgen quick-start
@@ -187,7 +187,7 @@ python3 dk-installer.py tg delete-demo
### Upgrade to latest version
-New releases of TestGen are announced on the `#releases` channel on [Data Observability Slack](https://data-observability-slack.datakitchen.io/join), and release notes can be found on the [DataKitchen documentation portal](https://docs.datakitchen.io/articles/dataops-testgen-help/testgen-release-notes/a/h1_1691719522). Use the following command to upgrade to the latest released version.
+New releases of TestGen are announced on the `#releases` channel on [Data Observability Slack](https://data-observability-slack.datakitchen.io/join), and release notes can be found on the [DataKitchen documentation portal](https://docs.datakitchen.io/testgen/release-notes/). Use the following command to upgrade to the latest released version.
```shell
python3 dk-installer.py tg upgrade
@@ -203,7 +203,7 @@ python3 dk-installer.py tg delete
### Access the _testgen_ CLI
-The [_testgen_ command line](https://docs.datakitchen.io/articles/dataops-testgen-help/testgen-commands-and-details) can be accessed within the running container.
+The [_testgen_ command line](https://docs.datakitchen.io/testgen/cli-reference/) can be accessed within the running container.
```shell
docker compose exec engine bash
@@ -226,13 +226,13 @@ docker compose up -d
## What Next?
### Getting started guide
-We recommend you start by going through the [Data Observability Overview Demo](https://docs.datakitchen.io/articles/open-source-data-observability/data-observability-overview).
+We recommend you start by going through the [Data Observability Overview Demo](https://docs.datakitchen.io/tutorials/quickstart-demo/).
### Support
For support requests, [join the Data Observability Slack](https://data-observability-slack.datakitchen.io/join) 👋 and post on the `#support` channel.
### Connect to your database
-Follow [these instructions](https://docs.datakitchen.io/articles/dataops-testgen-help/connect-your-database) to improve the quality of data in your database.
+Follow [these instructions](https://docs.datakitchen.io/testgen/connect-your-database/) to improve the quality of data in your database.
### Community
Talk and learn with other data practitioners who are building with DataKitchen. Share knowledge, get help, and contribute to our open-source project.
diff --git a/deploy/generate_third_party_notices.py b/deploy/generate_third_party_notices.py
new file mode 100644
index 00000000..8930e591
--- /dev/null
+++ b/deploy/generate_third_party_notices.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+"""Generate THIRD-PARTY-NOTICES from installed Python packages.
+
+Runs pip-licenses to collect metadata, filters out dev/internal packages,
+and outputs a formatted notices file with summary table and per-package details.
+
+Usage:
+ python generate_third_party_notices.py [--output PATH]
+"""
+
+import argparse
+import json
+import re
+import subprocess
+import sys
+from datetime import date
+from pathlib import Path
+
+# Packages installed temporarily during Docker build — never in pyproject.toml.
+_BUILD_ONLY = {"pip-licenses", "prettytable"}
+
+# Internal DK packages not discoverable from pyproject.toml structure.
+_EXTRA_INTERNAL = {"requests-extensions", "requests_extensions"}
+
+# Packages whose license is reported as UNKNOWN by pip-licenses (keys are normalized).
+LICENSE_OVERRIDES = {
+ "google-crc32c": "Apache-2.0",
+ "streamlit-camera-input-live": "MIT",
+ "streamlit-embedcode": "MIT",
+ "streamlit-keyup": "MIT",
+ "streamlit-toggle-switch": "MIT",
+ "streamlit-vertical-slider": "MIT",
+ "streamlit-faker": "Apache-2.0",
+}
+
+
+def _normalize(name: str) -> str:
+ """Normalize package name per PEP 503 (lowercase, hyphens/underscores/dots → hyphen)."""
+ return re.sub(r"[-_.]+", "-", name).lower()
+
+
+def _parse_pkg_name(requirement: str) -> str:
+ """Extract normalized package name from a PEP 508 requirement string."""
+ raw = re.split(r"[><=!~\[;@\s]", requirement, maxsplit=1)[0].strip()
+ return _normalize(raw)
+
+
+def _load_pyproject(path: Path) -> dict:
+ if sys.version_info >= (3, 11):
+ import tomllib
+ else:
+ import tomli as tomllib # type: ignore[no-redef]
+ with open(path, "rb") as f:
+ return tomllib.load(f)
+
+
+def _find_pyprojects(repo_root: Path) -> list[Path]:
+ """Return pyproject.toml paths for root, submodule, and plugins."""
+ candidates = [repo_root / "pyproject.toml", repo_root / "testgen" / "pyproject.toml"]
+ for plugins_dir in [repo_root / "plugins", repo_root / "testgen" / "plugins"]:
+ if plugins_dir.is_dir():
+ candidates.extend(sorted(plugins_dir.glob("*/pyproject.toml")))
+ return [p for p in candidates if p.exists()]
+
+
+def _resolve_transitive(names: set[str]) -> set[str]:
+ """Expand a set of normalized package names to include all their transitive dependencies."""
+ from importlib.metadata import requires, PackageNotFoundError
+
+ resolved: set[str] = set()
+ queue = list(names)
+ while queue:
+ name = queue.pop()
+ norm = _normalize(name)
+ if norm in resolved:
+ continue
+ resolved.add(norm)
+ try:
+ reqs = requires(name) or []
+ except PackageNotFoundError:
+ try:
+ reqs = requires(norm) or []
+ except PackageNotFoundError:
+ continue
+ for req in reqs:
+ if "; extra ==" in req or "; " in req:
+ continue
+ dep_name = _parse_pkg_name(req)
+ if dep_name and dep_name not in resolved:
+ queue.append(dep_name)
+ return resolved
+
+
+def _build_exclude_sets(repo_root: Path) -> tuple[set[str], set[str]]:
+ """Read pyproject.toml files to build dev-only and internal package sets."""
+ dev_direct: set[str] = set(_BUILD_ONLY)
+ internal: set[str] = set(_EXTRA_INTERNAL)
+
+ for pyproject_path in _find_pyprojects(repo_root):
+ data = _load_pyproject(pyproject_path)
+
+ project_name = data.get("project", {}).get("name")
+ if project_name:
+ internal.add(project_name)
+
+ for deps in data.get("project", {}).get("optional-dependencies", {}).values():
+ for dep in deps:
+ dev_direct.add(_parse_pkg_name(dep))
+
+ # Expand dev deps transitively, then subtract anything reachable from the main
+ # package. This keeps shared deps (e.g. requests, urllib3) in the runtime set.
+ dev_all = _resolve_transitive(dev_direct)
+ runtime_all = _resolve_transitive(internal)
+ dev_only = dev_all - runtime_all
+ return dev_only, internal
+
+
+def _find_repo_root() -> Path:
+ """Walk up from this script to find the repo root (contains pyproject.toml with 'testgen' subdir)."""
+ # Script lives at /testgen/deploy/ or is called from repo root
+ script_dir = Path(__file__).resolve().parent
+ for candidate in [script_dir.parent.parent, script_dir.parent, Path.cwd()]:
+ if (candidate / "pyproject.toml").exists() and (candidate / "testgen" / "pyproject.toml").exists():
+ return candidate
+ # Fallback: just use empty sets (Docker build context may not have root pyproject.toml)
+ return script_dir
+
+
+def normalize_license(name: str, lic: str) -> str:
+ if _normalize(name) in LICENSE_OVERRIDES:
+ return LICENSE_OVERRIDES[_normalize(name)]
+ if not lic or lic == "UNKNOWN":
+ return "UNKNOWN"
+ if "Apache" in lic and len(lic) > 50:
+ return "Apache-2.0"
+ return lic
+
+
+def extract_copyright(license_text: str) -> str | None:
+ if not license_text:
+ return None
+ lines: list[str] = []
+ seen: set[str] = set()
+ for line in license_text.split("\n"):
+ stripped = line.strip()
+ if re.match(r"(?i)copyright\s", stripped) and stripped not in seen:
+ lines.append(stripped)
+ seen.add(stripped)
+ return "\n".join(lines) if lines else None
+
+
+def get_packages() -> list[dict]:
+ result = subprocess.run(
+ [
+ sys.executable, "-m", "piplicenses",
+ "--format=json",
+ "--with-urls",
+ "--with-license-file",
+ "--with-notice-file",
+ "--no-license-path",
+ ],
+ capture_output=True,
+ text=True,
+ check=True,
+ )
+ return json.loads(result.stdout)
+
+
+def generate(packages: list[dict], dev_only: set[str], internal: set[str]) -> str:
+ runtime = [
+ pkg for pkg in packages
+ if _normalize(pkg["Name"]) not in internal and _normalize(pkg["Name"]) not in dev_only
+ ]
+ runtime.sort(key=lambda p: p["Name"].lower())
+
+ lines: list[str] = []
+
+ # Header
+ lines.append("THIRD-PARTY SOFTWARE NOTICES AND INFORMATION")
+ lines.append("=" * 60)
+ lines.append("")
+ lines.append("DataOps TestGen Enterprise")
+ lines.append(f"Copyright (c) {date.today().year} DataKitchen, Inc.")
+ lines.append("")
+ lines.append("This product includes software developed by third parties.")
+ lines.append("The following sets forth attribution notices for third-party")
+ lines.append("software that may be contained in portions of this product.")
+ lines.append("")
+ lines.append(f"Generated: {date.today().isoformat()}")
+ lines.append(f"Runtime dependencies: {len(runtime)}")
+ lines.append("")
+ lines.append("")
+
+ # Summary table
+ lines.append("-" * 60)
+ lines.append("SUMMARY")
+ lines.append("-" * 60)
+ lines.append("")
+ lines.append(f"{'Package':<40s} {'Version':<16s} {'License'}")
+ lines.append(f"{'-' * 40} {'-' * 16} {'-' * 30}")
+ for pkg in runtime:
+ lic = normalize_license(pkg["Name"], pkg["License"])
+ lines.append(f"{pkg['Name']:<40s} {pkg['Version']:<16s} {lic}")
+
+ lines.append("")
+ lines.append("")
+
+ # Detailed notices
+ lines.append("-" * 60)
+ lines.append("DETAILED NOTICES")
+ lines.append("-" * 60)
+
+ for pkg in runtime:
+ name = pkg["Name"]
+ version = pkg["Version"]
+ lic = normalize_license(name, pkg["License"])
+ url = pkg.get("URL", "")
+ license_text = pkg.get("LicenseText", "")
+ notice_text = pkg.get("NoticeText", "")
+
+ lines.append("")
+ lines.append("=" * 60)
+ lines.append(f"{name} {version}")
+ lines.append(f"License: {lic}")
+ if url and url != "UNKNOWN":
+ lines.append(f"URL: {url}")
+ lines.append("=" * 60)
+
+ copyright_line = extract_copyright(license_text)
+ if copyright_line:
+ lines.append("")
+ lines.append(copyright_line)
+
+ if notice_text and notice_text.strip() and notice_text.strip() != "UNKNOWN":
+ lines.append("")
+ lines.append("NOTICE:")
+ lines.append(notice_text.strip())
+
+ if license_text and license_text.strip() and license_text.strip() != "UNKNOWN":
+ text = license_text.strip()
+ # Abbreviate long Apache 2.0 boilerplate to the standard short form
+ if len(text) > 3000 and "apache" in text.lower():
+ lines.append("")
+ lines.append("Licensed under the Apache License, Version 2.0.")
+ lines.append("You may obtain a copy of the License at")
+ lines.append("")
+ lines.append(" http://www.apache.org/licenses/LICENSE-2.0")
+ lines.append("")
+ lines.append("Unless required by applicable law or agreed to in writing,")
+ lines.append("software distributed under the License is distributed on an")
+ lines.append('"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND.')
+ else:
+ lines.append("")
+ lines.append(text)
+
+ lines.append("")
+ return "\n".join(lines)
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(description="Generate THIRD-PARTY-NOTICES")
+ parser.add_argument("--output", default=None, help="Output file path (default: stdout)")
+ args = parser.parse_args()
+
+ repo_root = _find_repo_root()
+ dev_only, internal = _build_exclude_sets(repo_root)
+ packages = get_packages()
+ content = generate(packages, dev_only, internal)
+
+ if args.output:
+ with open(args.output, "w") as f:
+ f.write(content)
+ else:
+ print(content)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/deploy/testgen-base.dockerfile b/deploy/testgen-base.dockerfile
index d758a03f..0a297555 100644
--- a/deploy/testgen-base.dockerfile
+++ b/deploy/testgen-base.dockerfile
@@ -20,6 +20,8 @@ RUN apk update && apk upgrade && apk add --no-cache \
gpg \
gpgv \
openssl \
+ # glibc compatibility layer for packages that only ship manylinux wheels (e.g. hdbcli)
+ gcompat \
# Additional libraries needed and their dev counterparts. We add both so that we can remove
# the *-dev later, keeping the libraries
openblas=0.3.30-r2 \
@@ -39,9 +41,26 @@ COPY ./pyproject.toml /tmp/dk/pyproject.toml
RUN mkdir /dk
# Upgrading pip for security
-RUN python3 -m pip install --upgrade pip==26.0
+RUN python3 -m pip install --no-cache-dir --upgrade pip==26.0
-RUN python3 -m pip install --prefix=/dk /tmp/dk
+# hdbcli only ships manylinux wheels (no musl). pip 26+ correctly rejects these on Alpine.
+# We download the wheel for the correct arch, then extract it directly into site-packages
+# (wheels are zip files). gcompat provides the glibc shim needed at runtime.
+RUN ARCH=$(uname -m) && \
+ pip download --platform manylinux2014_${ARCH} --python-version 3.12 --only-binary :all: \
+ --no-deps -d /tmp/wheels hdbcli==2.25.31 && \
+ python3 -m zipfile -e /tmp/wheels/hdbcli-*.whl /dk/lib/python3.12/site-packages/ && \
+ # Copy dist-info to system site-packages so pip sees hdbcli as installed during
+ # dependency resolution (sqlalchemy-hana transitively depends on hdbcli~=2.10)
+ cp -r /dk/lib/python3.12/site-packages/hdbcli-*.dist-info \
+ "$(python3 -c 'import sysconfig; print(sysconfig.get_path("purelib"))')"/ && \
+ rm -rf /tmp/wheels
+
+# Strip hdbcli from pyproject.toml before installing — it's already extracted above and
+# pip 26+ would fail trying to resolve it from PyPI on musl
+RUN sed -i '/hdbcli/d' /tmp/dk/pyproject.toml
+
+RUN python3 -m pip install --no-cache-dir --prefix=/dk /tmp/dk
RUN apk del \
gcc \
@@ -59,4 +78,4 @@ RUN apk del \
unixodbc-dev \
apache-arrow-dev
-RUN rm /tmp/dk/install_linuxodbc.sh
+RUN rm -rf /root/.cache/pip /tmp/dk/install_linuxodbc.sh
diff --git a/deploy/testgen.dockerfile b/deploy/testgen.dockerfile
index 5c4bb933..6708fd67 100644
--- a/deploy/testgen.dockerfile
+++ b/deploy/testgen.dockerfile
@@ -1,4 +1,4 @@
-ARG TESTGEN_BASE_LABEL=v11
+ARG TESTGEN_BASE_LABEL=v14
FROM datakitchen/dataops-testgen-base:${TESTGEN_BASE_LABEL} AS release-image
@@ -12,10 +12,18 @@ ENV PATH=$PATH:/dk/bin
RUN apk upgrade
-# Now install everything
+# Now install everything (hdbcli is pre-installed in the base image via manual wheel extraction)
COPY . /tmp/dk/
-RUN python3 -m pip install --prefix=/dk /tmp/dk
-RUN rm -Rf /tmp/dk
+RUN sed -i '/hdbcli/d' /tmp/dk/pyproject.toml /tmp/dk/testgen/pyproject.toml 2>/dev/null; \
+ python3 -m pip install --no-cache-dir --prefix=/dk /tmp/dk
+
+# Generate third-party license notices from installed packages
+RUN pip install --no-cache-dir pip-licenses \
+ && SCRIPT=$(find /tmp/dk -name generate_third_party_notices.py | head -1) \
+ && PYTHONPATH=/dk/lib/python3.12/site-packages python3 "$SCRIPT" --output /dk/THIRD-PARTY-NOTICES \
+ && pip uninstall -y pip-licenses
+
+RUN rm -Rf /tmp/dk /root/.cache/pip
RUN tg-patch-streamlit
diff --git a/pyproject.toml b/pyproject.toml
index bcea6dd2..9cc59ed5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "dataops-testgen"
-version = "5.0.2"
+version = "5.9.4"
description = "DataKitchen's Data Quality DataOps TestGen"
authors = [
{ "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },
@@ -32,8 +32,12 @@ dependencies = [
"click==8.1.3",
"sqlalchemy==1.4.46",
"databricks-sql-connector==2.9.3",
- "snowflake-sqlalchemy==1.6.1",
+ "databricks-sdk>=0.20.0",
+ "snowflake-sqlalchemy==1.9.0",
"sqlalchemy-bigquery==1.14.1",
+ "oracledb==3.4.0",
+ "hdbcli==2.25.31",
+ "sqlalchemy-hana==2.1.0",
"pyodbc==5.0.0",
"psycopg2-binary==2.9.9",
"pycryptodome==3.21",
@@ -53,11 +57,9 @@ dependencies = [
"xlsxwriter==3.2.0",
"psutil==5.9.8",
"concurrent_log_handler==0.9.25",
- "cryptography==44.0.1",
+ "cryptography==46.0.5",
"validators==0.33.0",
"reportlab==4.2.2",
- "pydantic==1.10.13",
- "streamlit-pydantic==0.6.0",
"cron-converter==1.2.1",
"cron-descriptor==2.0.5",
"pybars3==0.9.7",
@@ -70,6 +72,18 @@ dependencies = [
"matplotlib==3.9.2",
"scipy==1.14.1",
"jinja2==3.1.6",
+ "pillow==12.1.1",
+ "protobuf==6.33.5",
+
+ # MCP server
+ "mcp[cli]==1.26.0",
+ "uvicorn==0.41.0",
+ "PyJWT==2.12.0",
+ "bcrypt==5.0.0",
+
+ # API & OAuth server
+ "authlib~=1.6.6",
+ "fastapi==0.135.1",
]
[project.optional-dependencies]
@@ -99,8 +113,8 @@ tg-patch-streamlit = "testgen.ui.scripts.patch_streamlit:patch"
[project.urls]
"Source Code" = "https://github.com/DataKitchen/dataops-testgen"
"Bug Tracker" = "https://github.com/DataKitchen/dataops-testgen/issues"
-"Documentation" = "https://docs.datakitchen.io/articles/dataops-testgen-help/dataops-testgen-help"
-"Release Notes" = "https://docs.datakitchen.io/articles/dataops-testgen-help/testgen-release-notes"
+"Documentation" = "https://docs.datakitchen.io/testgen/what-is-testgen/"
+"Release Notes" = "https://docs.datakitchen.io/testgen/release-notes/"
"Slack" = "https://data-observability-slack.datakitchen.io/join"
"Homepage" = "https://example.com"
@@ -211,6 +225,7 @@ exclude = [
"_build",
"build",
"dist",
+ "deploy",
]
[tool.ruff.lint]
@@ -294,3 +309,7 @@ asset_dir = "ui/components/frontend/js"
[[tool.streamlit.component.components]]
name = "edit_table_monitors"
asset_dir = "ui/components/frontend/js"
+
+[[tool.streamlit.component.components]]
+name = "project_settings"
+asset_dir = "ui/components/frontend/standalone/project_settings"
diff --git a/testgen/__main__.py b/testgen/__main__.py
index 62ae21c0..29c6d3b0 100644
--- a/testgen/__main__.py
+++ b/testgen/__main__.py
@@ -56,6 +56,8 @@
LOG = logging.getLogger("testgen")
APP_MODULES = ["ui", "scheduler"]
+if settings.MCP_ENABLED:
+ APP_MODULES.append("mcp")
VERSION_DATA = version_service.get_version()
CHILDREN_POLL_INTERVAL = 10
@@ -72,6 +74,8 @@ class CliGroup(click.Group):
def invoke(self, ctx: Context):
try:
super().invoke(ctx)
+ except click.exceptions.UsageError:
+ raise
except Exception:
LOG.exception("There was an unexpected error")
@@ -764,6 +768,10 @@ def run_app(module):
case "scheduler":
run_scheduler()
+ case "mcp":
+ from testgen.mcp.server import run_mcp
+ run_mcp()
+
case "all":
children = [
subprocess.Popen([sys.executable, sys.argv[0], "run-app", m], start_new_session=True)
@@ -793,5 +801,33 @@ def term_children(signum, _):
+@cli.command("mcp-token", help="Generate a JWT token for MCP server authentication.")
+@click.option("--username", required=True, help="TestGen username")
+@click.option("--password", required=True, hide_input=True, help="TestGen password")
+@with_database_session
+def mcp_token(username: str, password: str):
+ from testgen.mcp import get_server_url
+ from testgen.mcp.auth import authenticate_user
+ try:
+ token = authenticate_user(username, password)
+ except ValueError as e:
+ click.secho(str(e), fg="red")
+ sys.exit(1)
+
+ mcp_url = f"{get_server_url()}/mcp"
+
+ click.echo()
+ click.echo(token)
+ click.echo()
+ click.secho("MCP server URL:", bold=True)
+ click.echo(f" {mcp_url}")
+ click.echo()
+ click.secho("Pass the token as a Bearer header when connecting from any MCP client.", dim=True)
+ click.echo()
+ click.secho("Example — Claude Code:", bold=True)
+ click.echo(f' claude mcp add --transport http testgen {mcp_url} --header "Authorization: Bearer {token}"')
+ click.echo()
+
+
if __name__ == "__main__":
cli()
diff --git a/testgen/commands/queries/execute_tests_query.py b/testgen/commands/queries/execute_tests_query.py
index 90d67859..4902cf98 100644
--- a/testgen/commands/queries/execute_tests_query.py
+++ b/testgen/commands/queries/execute_tests_query.py
@@ -78,6 +78,150 @@ class AggregateResult(TypedDict):
result_codes: str
+def build_cat_expressions(
+ measure: str,
+ test_operator: str,
+ test_condition: str,
+ history_calculation: str,
+ lower_tolerance: str,
+ upper_tolerance: str,
+ varchar_type: str,
+ concat_operator: str,
+ null_value: str = "",
+) -> tuple[str, str]:
+ """Build measure_expression and condition_expression for a CAT test.
+
+ Args:
+ measure: Already-resolved measure SQL expression.
+ test_operator: Comparison operator (e.g., "=", "BETWEEN").
+ test_condition: Already-resolved test condition SQL expression.
+ history_calculation: "PREDICT" for prediction mode, anything else for normal.
+ lower_tolerance: Lower tolerance value (empty/None means training mode for PREDICT).
+ upper_tolerance: Upper tolerance value (empty/None means training mode for PREDICT).
+ varchar_type: DB-specific varchar type (e.g., "VARCHAR", "STRING").
+ concat_operator: DB-specific concat operator (e.g., "||", "+").
+ null_value: Sentinel string for NULL values.
+
+ Returns:
+ (measure_expression, condition_expression)
+ """
+ measure_expression = f"COALESCE(CAST({measure} AS {varchar_type}) {concat_operator} '|', '{null_value}|')"
+
+ # For prediction mode, return -1 during training period
+ if history_calculation == "PREDICT" and (lower_tolerance in (None, "") or upper_tolerance in (None, "")):
+ condition_expression = "'-1,'"
+ else:
+ condition = (
+ f"{measure} {test_operator} {test_condition}"
+ if "BETWEEN" in test_operator
+ else f"{measure}{test_operator}{test_condition}"
+ )
+ condition_expression = f"CASE WHEN {condition} THEN '0,' ELSE '1,' END"
+
+ return measure_expression, condition_expression
+
+
+def group_cat_tests(
+ test_defs: list[TestExecutionDef],
+ max_query_chars: int,
+ concat_operator: str,
+ single: bool = False,
+) -> list[list[TestExecutionDef]]:
+ """Group test defs into batches respecting character limit.
+
+ All test defs must have measure_expression and condition_expression set.
+
+ Args:
+ test_defs: List of test defs with expressions already set.
+ max_query_chars: Maximum characters per query.
+ concat_operator: DB-specific concat operator for calculating expression size.
+ single: If True, put each test def in its own group.
+
+ Returns:
+ List of groups, where each group is a list of test defs.
+ """
+ if single:
+ return [[td] for td in test_defs]
+
+ test_defs_by_table: dict[tuple[str, str], list[TestExecutionDef]] = {}
+ for td in test_defs:
+ table = (td.schema_name, td.table_name)
+ if not test_defs_by_table.get(table):
+ test_defs_by_table[table] = []
+ test_defs_by_table[table].append(td)
+
+ groups: list[list[TestExecutionDef]] = []
+ for table_test_defs in test_defs_by_table.values():
+ current_chars = 0
+ current_group: list[TestExecutionDef] = []
+
+ for td in table_test_defs:
+ td_chars = len(td.measure_expression) + len(td.condition_expression) + 2 * len(concat_operator)
+ if (current_chars + td_chars) > max_query_chars:
+ if current_group:
+ groups.append(current_group)
+ current_chars = 0
+ current_group = []
+
+ current_chars += td_chars
+ current_group.append(td)
+
+ if current_group:
+ groups.append(current_group)
+
+ return groups
+
+
+def parse_cat_results(
+ aggregate_results: list[AggregateResult],
+ aggregate_test_defs: list[list[TestExecutionDef]],
+ test_run_id: UUID,
+ test_suite_id: UUID | str,
+ test_starttime: datetime,
+ input_parameters_fn,
+ null_value: str = "",
+) -> list[list]:
+ """Parse aggregate query results into individual test result rows.
+
+ Args:
+ aggregate_results: List of aggregate result dicts from DB.
+ aggregate_test_defs: List of test def groups matching the queries.
+ test_run_id: ID of the current test run.
+ test_suite_id: ID of the test suite.
+ test_starttime: Start time of the test run.
+ input_parameters_fn: Callable that takes a TestExecutionDef and returns input params string.
+ null_value: Sentinel string for NULL values.
+
+ Returns:
+ List of result rows (each row is a list of values).
+ """
+ test_results: list[list] = []
+ for result in aggregate_results:
+ test_defs = aggregate_test_defs[result["query_index"]]
+ result_measures = result["result_measures"].split("|")
+ result_codes = result["result_codes"].split(",")
+
+ for index, td in enumerate(test_defs):
+ test_results.append([
+ test_run_id,
+ test_suite_id,
+ test_starttime,
+ td.id,
+ td.test_type,
+ td.schema_name,
+ td.table_name,
+ td.column_name,
+ td.skip_errors or 0,
+ input_parameters_fn(td),
+ result_codes[index],
+ None, # result_status will be calculated later
+ None, # No result_message
+ result_measures[index] if result_measures[index] != null_value else None,
+ ])
+
+ return test_results
+
+
class TestExecutionSQL:
null_value = ""
@@ -182,16 +326,21 @@ def _get_params(self, test_def: TestExecutionDef | None = None) -> dict:
# Freshness exclusion params — computed per test at execution time
if test_def.test_type == "Freshness_Trend" and test_def.baseline_sum:
sched = get_schedule_params(test_def.prediction)
- has_exclusions = self._exclude_weekends or sched.excluded_days or sched.window_start is not None
+ # Once the schedule is active (excluded_days derived from active_days),
+ # it supersedes exclude_weekends as the single source of truth for
+ # day exclusion — avoids conflicts where a detection day (e.g. Saturday)
+ # is active per schedule but excluded per exclude_weekends.
+ effective_exclude_weekends = False if sched.excluded_days else self._exclude_weekends
+ has_exclusions = effective_exclude_weekends or sched.excluded_days or sched.window_start is not None
if has_exclusions:
last_update = pd.Timestamp(test_def.baseline_sum)
- excluded = int(count_excluded_minutes(
- last_update, self.run_date, self._exclude_weekends, self._holiday_dates,
+ excluded = round(count_excluded_minutes(
+ last_update, self.run_date, effective_exclude_weekends, self._holiday_dates,
tz=self._schedule_tz, excluded_days=sched.excluded_days,
window_start=sched.window_start, window_end=sched.window_end,
))
is_excl = 1 if is_excluded_day(
- pd.Timestamp(self.run_date), self._exclude_weekends, self._holiday_dates,
+ pd.Timestamp(self.run_date), effective_exclude_weekends, self._holiday_dates,
tz=self._schedule_tz, excluded_days=sched.excluded_days,
window_start=sched.window_start, window_end=sched.window_end,
) else 0
@@ -225,11 +374,15 @@ def _get_query(
query = query.replace(":", "\\:")
return query, None if no_bind else params
-
+
def has_schema_changes(self) -> tuple[dict]:
# Runs on App database
return self._get_query("has_schema_changes.sql")
+ def get_missing_freshness_monitors(self) -> tuple[str, dict]:
+ # Runs on App database
+ return self._get_query("get_missing_freshness_monitors.sql")
+
def get_errored_autogen_monitors(self) -> tuple[str, dict]:
# Runs on App database
return self._get_query("get_errored_autogen_monitors.sql")
@@ -313,69 +466,37 @@ def aggregate_cat_tests(
measure = replace_params(td.measure, params)
measure = replace_templated_functions(measure, self.flavor)
- td.measure_expression = f"COALESCE(CAST({measure} AS {varchar_type}) {concat_operator} '|', '{self.null_value}|')"
+ condition = replace_params(td.test_condition, params)
+ condition = replace_templated_functions(condition, self.flavor)
+
+ td.measure_expression, td.condition_expression = build_cat_expressions(
+ measure=measure,
+ test_operator=td.test_operator,
+ test_condition=condition,
+ history_calculation=td.history_calculation,
+ lower_tolerance=td.lower_tolerance,
+ upper_tolerance=td.upper_tolerance,
+ varchar_type=varchar_type,
+ concat_operator=concat_operator,
+ null_value=self.null_value,
+ )
- # For prediction mode, return -1 during training period
- if td.history_calculation == "PREDICT" and (td.lower_tolerance in (None, "") or td.upper_tolerance in (None, "")):
- td.condition_expression = "'-1,'"
- else:
- condition = (
- f"{td.measure} {td.test_operator} {td.test_condition}"
- if "BETWEEN" in td.test_operator
- else f"{td.measure}{td.test_operator}{td.test_condition}"
- )
- condition = replace_params(condition, params)
- condition = replace_templated_functions(condition, self.flavor)
- td.condition_expression = f"CASE WHEN {condition} THEN '0,' ELSE '1,' END"
+ max_query_chars = self.connection.max_query_chars - 400
+ groups = group_cat_tests(test_defs, max_query_chars, concat_operator, single)
aggregate_queries: list[tuple[str, None]] = []
aggregate_test_defs: list[list[TestExecutionDef]] = []
-
- def add_query(test_defs: list[TestExecutionDef]) -> str:
- if not test_defs:
- return
-
+ for group in groups:
query = (
f"SELECT {len(aggregate_queries)} AS query_index, "
- f"{concat_operator.join([td.measure_expression for td in test_defs])} AS result_measures, "
- f"{concat_operator.join([td.condition_expression for td in test_defs])} AS result_codes "
- f"FROM {quote}{test_defs[0].schema_name}{quote}.{quote}{test_defs[0].table_name}{quote}"
+ f"{concat_operator.join([td.measure_expression for td in group])} AS result_measures, "
+ f"{concat_operator.join([td.condition_expression for td in group])} AS result_codes "
+ f"FROM {quote}{group[0].schema_name}{quote}.{quote}{group[0].table_name}{quote}"
)
query = query.replace(":", "\\:")
aggregate_queries.append((query, None))
- aggregate_test_defs.append(test_defs)
-
- if single:
- for td in test_defs:
- # Add separate query for each test
- add_query([td])
- else:
- test_defs_by_table: dict[tuple[str, str], list[TestExecutionDef]] = {}
- for td in test_defs:
- table = (td.schema_name, td.table_name)
- if not test_defs_by_table.get(table):
- test_defs_by_table[table] = []
- test_defs_by_table[table].append(td)
-
- max_query_chars = self.connection.max_query_chars - 400
- for test_defs in test_defs_by_table.values():
- # Add new query for each table
- current_chars = 0
- current_test_defs = []
-
- for td in test_defs:
- td_chars = len(td.measure_expression) + len(td.condition_expression) + 2 * len(concat_operator)
- # Add new query if current query will become bigger than character limit
- if (current_chars + td_chars) > max_query_chars:
- add_query(current_test_defs)
- current_chars = 0
- current_test_defs = []
-
- current_chars += td_chars
- current_test_defs.append(td)
-
- add_query(current_test_defs)
+ aggregate_test_defs.append(group)
return aggregate_queries, aggregate_test_defs
@@ -384,31 +505,15 @@ def get_cat_test_results(
aggregate_results: list[AggregateResult],
aggregate_test_defs: list[list[TestExecutionDef]],
) -> list[list[UUID | str | datetime | int | None]]:
- test_results: list[list[UUID | str | datetime | int | None]] = []
- for result in aggregate_results:
- test_defs = aggregate_test_defs[result["query_index"]]
- result_measures = result["result_measures"].split("|")
- result_codes = result["result_codes"].split(",")
-
- for index, td in enumerate(test_defs):
- test_results.append([
- self.test_run.id,
- self.test_run.test_suite_id,
- self.test_run.test_starttime,
- td.id,
- td.test_type,
- td.schema_name,
- td.table_name,
- td.column_name,
- td.skip_errors or 0,
- self._get_input_parameters(td),
- result_codes[index],
- None, # result_status will be calculated later
- None, # No result_message
- result_measures[index] if result_measures[index] != self.null_value else None,
- ])
-
- return test_results
+ return parse_cat_results(
+ aggregate_results=aggregate_results,
+ aggregate_test_defs=aggregate_test_defs,
+ test_run_id=self.test_run.id,
+ test_suite_id=self.test_run.test_suite_id,
+ test_starttime=self.test_run.test_starttime,
+ input_parameters_fn=self._get_input_parameters,
+ null_value=self.null_value,
+ )
def update_test_results(self) -> list[tuple[str, dict]]:
# Runs on App database
diff --git a/testgen/commands/queries/profiling_query.py b/testgen/commands/queries/profiling_query.py
index c1ec78fe..95c60433 100644
--- a/testgen/commands/queries/profiling_query.py
+++ b/testgen/commands/queries/profiling_query.py
@@ -2,7 +2,7 @@
from uuid import UUID
from testgen.commands.queries.refresh_data_chars_query import ColumnChars
-from testgen.common import read_template_sql_file, read_template_yaml_file
+from testgen.common import read_template_sql_file
from testgen.common.database.database_service import process_conditionals, replace_params
from testgen.common.models.connection import Connection
from testgen.common.models.profiling_run import ProfilingRun
@@ -19,6 +19,40 @@ class TableSampling:
sample_percent: float
+def calculate_sampling_params(
+ table_name: str,
+ record_count: int,
+ sample_percent_raw: str | float,
+ min_sample: int,
+ max_sample: int = 999000,
+) -> TableSampling | None:
+ """Calculate sampling parameters for a table based on record count and sample percent.
+
+ Returns None if sampling is not applicable (invalid percent, or record_count <= min_sample).
+ """
+ if isinstance(sample_percent_raw, str):
+ cleaned = sample_percent_raw.replace(".", "", 1) if sample_percent_raw else ""
+ sample_percent = float(sample_percent_raw) if cleaned.isdigit() else 30
+ else:
+ sample_percent = float(sample_percent_raw) if sample_percent_raw is not None else 30
+
+ if not (0 < sample_percent < 100):
+ return None
+
+ if record_count <= min_sample:
+ return None
+
+ calc_sample = round(sample_percent * record_count / 100)
+ sample_count = min(max(calc_sample, min_sample), max_sample)
+
+ return TableSampling(
+ table_name=table_name,
+ sample_count=sample_count,
+ sample_ratio=record_count / sample_count,
+ sample_percent=round(100 * sample_count / record_count, 4),
+ )
+
+
@dataclasses.dataclass
class HygieneIssueType:
id: str
@@ -60,7 +94,6 @@ def __init__(self, connection: Connection, table_group: TableGroup, profiling_ru
self.profiling_run = profiling_run
self.run_date = profiling_run.profiling_starttime
self.flavor = connection.sql_flavor
- self._profiling_template: dict = None
def _get_params(self, column_chars: ColumnChars | None = None, table_sampling: TableSampling | None = None) -> dict:
params = {
@@ -116,14 +149,6 @@ def _get_query(
return query, params
- def _get_profiling_template(self) -> dict:
- if not self._profiling_template:
- self._profiling_template = read_template_yaml_file(
- "project_profiling_query.yaml",
- sub_directory=f"flavors/{self.flavor}/profiling",
- )
- return self._profiling_template
-
def get_frequency_analysis_columns(self) -> tuple[str, dict]:
# Runs on App database
return self._get_query("secondary_profiling_columns.sql")
@@ -142,8 +167,10 @@ def update_profiling_results(self) -> list[tuple[str, dict]]:
self._get_query("functional_datatype.sql"),
self._get_query("functional_tabletype_stage.sql"),
self._get_query("functional_tabletype_update.sql"),
- self._get_query("pii_flag.sql"),
]
+ if self.table_group.profile_flag_pii:
+ queries.append(self._get_query("pii_flag.sql"))
+ queries.append(self._get_query("pii_flag_update.sql"))
if self.table_group.profile_flag_cdes:
queries.append(self._get_query("cde_flagger_query.sql"))
return queries
@@ -194,42 +221,33 @@ def update_hygiene_issue_prevalence(self, issue_type: HygieneIssueType) -> tuple
def run_column_profiling(self, column_chars: ColumnChars, table_sampling: TableSampling | None = None) -> tuple[str, dict]:
# Runs on Target database
- template = self._get_profiling_template()
general_type = column_chars.general_type
+ do_sample = bool(table_sampling)
- query = ""
- query += template["01_sampling" if table_sampling else "01_else"]
- query += template["01_all"]
- query += template["02_X" if general_type == "X" else "02_else"]
- query += template["03_ADN" if general_type in ["A", "D", "N"] else "03_else"]
-
- if general_type == "A":
- query += template["04_A"]
- elif general_type == "N":
- query += template["04_N"]
- else:
- query += template["04_else"]
-
- query += template["05_A" if general_type == "A" else "05_else"]
- query += template["06_A" if general_type == "A" else "06_else"]
- query += template["08_N" if general_type == "N" else "08_else"]
- query += template["10_N_dec" if general_type == "N" and column_chars.is_decimal == True else "10_else"]
- query += template["11_D" if general_type == "D" else "11_else"]
- query += template["12_B" if general_type == "B" else "12_else"]
- query += template["14_A" if general_type == "A" else "14_else"]
- query += template["16_all"]
- query += template["98_all"]
-
- if general_type == "N":
- query += template["99_N_sampling" if table_sampling else "99_N"]
- else:
- query += template["99_else"]
-
- params = self._get_params(column_chars, table_sampling)
- query = replace_params(query, params)
- query = replace_templated_functions(query, self.flavor)
+ extra_params = {
+ "do_sample": do_sample,
+ "is_type_A": general_type == "A",
+ "is_type_N": general_type == "N",
+ "is_type_D": general_type == "D",
+ "is_type_B": general_type == "B",
+ "is_type_ADN": general_type in ("A", "D", "N"),
+ "is_type_X": general_type == "X",
+ "is_A_sampling": general_type == "A" and do_sample,
+ "is_A_no_sampling": general_type == "A" and not do_sample,
+ "is_N_decimal": general_type == "N" and column_chars.is_decimal,
+ "is_N_sampling": general_type == "N" and do_sample,
+ "is_N_no_sampling": general_type == "N" and not do_sample,
+ "is_not_A": general_type != "A",
+ "is_not_A_not_N": general_type not in ("A", "N"),
+ }
- return query, params
+ return self._get_query(
+ "project_profiling_query.sql",
+ f"flavors/{self.flavor}/profiling",
+ extra_params=extra_params,
+ column_chars=column_chars,
+ table_sampling=table_sampling,
+ )
def get_profiling_errors(self, column_errors: list[tuple[ColumnChars, str]]) -> list[list[str | UUID | int]]:
return [
diff --git a/testgen/commands/queries/refresh_data_chars_query.py b/testgen/commands/queries/refresh_data_chars_query.py
index 1df6e994..9964a2d4 100644
--- a/testgen/commands/queries/refresh_data_chars_query.py
+++ b/testgen/commands/queries/refresh_data_chars_query.py
@@ -99,7 +99,7 @@ def _get_table_criteria(self) -> str:
"""
return table_criteria
-
+
def get_schema_ddf(self) -> tuple[str, dict]:
# Runs on Target database
return self._get_query(
@@ -107,7 +107,7 @@ def get_schema_ddf(self) -> tuple[str, dict]:
f"flavors/{self.flavor}/data_chars",
extra_params={"TABLE_CRITERIA": self._get_table_criteria()},
)
-
+
def get_row_counts(self, table_names: Iterable[str]) -> list[tuple[str, None]]:
# Runs on Target database
schema = self.table_group.table_group_schema
@@ -118,18 +118,20 @@ def get_row_counts(self, table_names: Iterable[str]) -> list[tuple[str, None]]:
]
chunked_queries = chunk_queries(count_queries, " UNION ALL ", self.connection.max_query_chars)
return [ (query, None) for query in chunked_queries ]
-
+
def verify_access(self, table_name: str) -> tuple[str, None]:
# Runs on Target database
schema = self.table_group.table_group_schema
quote = self.flavor_service.quote_character
- query = (
- f"SELECT 1 FROM {quote}{schema}{quote}.{quote}{table_name}{quote} LIMIT 1"
- if not self.flavor_service.use_top
- else f"SELECT TOP 1 * FROM {quote}{schema}{quote}.{quote}{table_name}{quote}"
- )
+ table_ref = f"{quote}{schema}{quote}.{quote}{table_name}{quote}"
+ if (row_limiting := self.flavor_service.row_limiting_clause) == "top":
+ query = f"SELECT TOP 1 * FROM {table_ref}"
+ elif row_limiting == "fetch":
+ query = f"SELECT 1 FROM {table_ref} FETCH FIRST 1 ROWS ONLY"
+ else:
+ query = f"SELECT 1 FROM {table_ref} LIMIT 1"
return (query, None)
-
+
def get_staging_data_chars(self, data_chars: list[ColumnChars], run_date: datetime) -> list[list[str | bool | int]]:
return [
[
@@ -147,7 +149,7 @@ def get_staging_data_chars(self, data_chars: list[ColumnChars], run_date: dateti
]
for column in data_chars
]
-
+
def update_data_chars(self, run_date: datetime) -> list[tuple[str, dict]]:
# Runs on App database
params = {"RUN_DATE": to_sql_timestamp(run_date)}
diff --git a/testgen/commands/run_profiling.py b/testgen/commands/run_profiling.py
index c97ec695..73f45ce4 100644
--- a/testgen/commands/run_profiling.py
+++ b/testgen/commands/run_profiling.py
@@ -6,7 +6,12 @@
import testgen.common.process_service as process_service
from testgen import settings
-from testgen.commands.queries.profiling_query import HygieneIssueType, ProfilingSQL, TableSampling
+from testgen.commands.queries.profiling_query import (
+ HygieneIssueType,
+ ProfilingSQL,
+ TableSampling,
+ calculate_sampling_params,
+)
from testgen.commands.queries.refresh_data_chars_query import ColumnChars
from testgen.commands.queries.rollup_scores_query import RollupScoresSQL
from testgen.commands.run_refresh_data_chars import run_data_chars_refresh
@@ -21,8 +26,9 @@
)
from testgen.common.database.database_service import ThreadedProgress, empty_cache
from testgen.common.mixpanel_service import MixpanelService
-from testgen.common.models import with_database_session
+from testgen.common.models import get_current_session, with_database_session
from testgen.common.models.connection import Connection
+from testgen.common.models.data_column import DataColumnChars
from testgen.common.models.profiling_run import ProfilingRun
from testgen.common.models.table_group import TableGroup
from testgen.common.models.test_suite import TestSuite
@@ -73,10 +79,15 @@ def run_profiling(table_group_id: str | UUID, username: str | None = None, run_d
profiling_run.init_progress()
profiling_run.set_progress("data_chars", "Running")
profiling_run.save()
+ # This runs in a subprocess — commit after every save so progress is visible
+ # to the UI (separate session) and to execute_db_queries (independent connection).
+ get_current_session().commit()
LOG.info(f"Profiling run: {profiling_run.id}, Table group: {table_group.table_groups_name}, Connection: {connection.connection_name}")
try:
data_chars = run_data_chars_refresh(connection, table_group, profiling_run.profiling_starttime)
+ if table_group.profile_exclude_xde:
+ data_chars = _exclude_xde_columns(data_chars, table_group.id)
distinct_tables = {(column.table_name, column.record_ct) for column in data_chars}
profiling_run.set_progress("data_chars", "Completed")
@@ -104,6 +115,7 @@ def run_profiling(table_group_id: str | UUID, username: str | None = None, run_d
profiling_run.profiling_endtime = datetime.now(UTC) + time_delta
profiling_run.status = "Error"
profiling_run.save()
+ get_current_session().commit()
send_profiling_run_notifications(profiling_run)
else:
@@ -111,6 +123,7 @@ def run_profiling(table_group_id: str | UUID, username: str | None = None, run_d
profiling_run.profiling_endtime = datetime.now(UTC) + time_delta
profiling_run.status = "Complete"
profiling_run.save()
+ get_current_session().commit()
send_profiling_run_notifications(profiling_run)
_rollup_profiling_scores(profiling_run, table_group)
@@ -134,33 +147,42 @@ def run_profiling(table_group_id: str | UUID, username: str | None = None, run_d
"""
+def _exclude_xde_columns(data_chars: list[ColumnChars], table_group_id: UUID) -> list[ColumnChars]:
+ """Filter out columns marked as excluded_data_element in data_column_chars."""
+ xde_columns = DataColumnChars.select_where(
+ DataColumnChars.table_groups_id == table_group_id,
+ DataColumnChars.excluded_data_element.is_(True),
+ )
+ if not xde_columns:
+ return data_chars
+
+ excluded = {(col.table_name, col.column_name) for col in xde_columns}
+ filtered = [col for col in data_chars if (col.table_name, col.column_name) not in excluded]
+ if len(filtered) < len(data_chars):
+ LOG.info(f"Excluding {len(data_chars) - len(filtered)} XDE columns from profiling")
+ return filtered
+
+
def _run_column_profiling(sql_generator: ProfilingSQL, data_chars: list[ColumnChars]) -> None:
profiling_run = sql_generator.profiling_run
profiling_run.set_progress("col_profiling", "Running")
profiling_run.save()
+ get_current_session().commit()
LOG.info(f"Running column profiling queries: {len(data_chars)}")
table_group = sql_generator.table_group
sampling_params: dict[str, TableSampling] = {}
- sample_percent = (
- float(table_group.profile_sample_percent)
- if str(table_group.profile_sample_percent).replace(".", "", 1).isdigit()
- else 30
- )
- if table_group.profile_use_sampling and 0 < sample_percent < 100:
- min_sample = table_group.profile_sample_min_count
- max_sample = 999000
+ if table_group.profile_use_sampling:
for column in data_chars:
- if not sampling_params.get(column.table_name) and column.record_ct > min_sample:
- calc_sample = round(sample_percent * column.record_ct / 100)
- sample_count = min(max(calc_sample, min_sample), max_sample)
-
- sampling_params[column.table_name] = TableSampling(
+ if not sampling_params.get(column.table_name):
+ result = calculate_sampling_params(
table_name=column.table_name,
- sample_count=sample_count,
- sample_ratio=column.record_ct / sample_count,
- sample_percent=round(100 * sample_count / column.record_ct, 4),
+ record_count=column.record_ct,
+ sample_percent_raw=table_group.profile_sample_percent,
+ min_sample=table_group.profile_sample_min_count,
)
+ if result:
+ sampling_params[column.table_name] = result
def update_column_progress(progress: ThreadedProgress) -> None:
profiling_run.set_progress(
@@ -172,6 +194,7 @@ def update_column_progress(progress: ThreadedProgress) -> None:
else None,
)
profiling_run.save()
+ get_current_session().commit()
profiling_results, result_columns, error_data = fetch_from_db_threaded(
[sql_generator.run_column_profiling(column, sampling_params.get(column.table_name)) for column in data_chars],
@@ -219,6 +242,7 @@ def _run_frequency_analysis(sql_generator: ProfilingSQL) -> None:
profiling_run = sql_generator.profiling_run
profiling_run.set_progress("freq_analysis", "Running")
profiling_run.save()
+ get_current_session().commit()
error_data = None
try:
@@ -233,6 +257,7 @@ def update_frequency_progress(progress: ThreadedProgress) -> None:
"freq_analysis", "Running", detail=f"{progress['processed']} of {progress['total']}"
)
profiling_run.save()
+ get_current_session().commit()
frequency_results, result_columns, error_data = fetch_from_db_threaded(
[sql_generator.run_frequency_analysis(ColumnChars(**column)) for column in frequency_columns],
@@ -265,6 +290,7 @@ def _run_hygiene_issue_detection(sql_generator: ProfilingSQL) -> None:
profiling_run = sql_generator.profiling_run
profiling_run.set_progress("hygiene_issues", "Running")
profiling_run.save()
+ get_current_session().commit()
try:
LOG.info("Detecting functional data types and critical data elements")
diff --git a/testgen/commands/run_test_execution.py b/testgen/commands/run_test_execution.py
index a809ad20..06aae744 100644
--- a/testgen/commands/run_test_execution.py
+++ b/testgen/commands/run_test_execution.py
@@ -23,7 +23,7 @@
)
from testgen.common.database.database_service import ThreadedProgress, empty_cache
from testgen.common.mixpanel_service import MixpanelService
-from testgen.common.models import with_database_session
+from testgen.common.models import get_current_session, with_database_session
from testgen.common.models.connection import Connection
from testgen.common.models.table_group import TableGroup
from testgen.common.models.test_run import TestRun
@@ -78,6 +78,10 @@ def run_test_execution(test_suite_id: str | UUID, username: str | None = None, r
test_run.init_progress()
test_run.set_progress("data_chars", "Running")
test_run.save()
+ # This runs in a subprocess — commit after every save so progress is visible
+ # to the UI (separate session) and to execute_db_queries (independent connection).
+ session = get_current_session()
+ session.commit()
try:
LOG.info(f"Test run: {test_run.id}, Test suite: {test_suite.test_suite}, Table group: {table_group.table_groups_name}, Connection: {connection.connection_name}")
@@ -101,6 +105,7 @@ def run_test_execution(test_suite_id: str | UUID, username: str | None = None, r
LOG.info(f"Active test definitions: {len(test_defs)}")
test_run.set_progress("validation", "Running")
test_run.save()
+ session.commit()
valid_test_defs = run_test_validation(sql_generator, test_defs)
invalid_count = len(test_defs) - len(valid_test_defs)
@@ -134,6 +139,7 @@ def run_test_execution(test_suite_id: str | UUID, username: str | None = None, r
LOG.info("Updating test results and test run")
test_run.save()
+ session.commit()
execute_db_queries(sql_generator.update_test_results())
# Refresh needed because previous query updates the test run too
test_run.refresh()
@@ -145,6 +151,7 @@ def run_test_execution(test_suite_id: str | UUID, username: str | None = None, r
test_run.test_endtime = datetime.now(UTC) + time_delta
test_run.status = "Error"
test_run.save()
+ session.commit()
send_test_run_notifications(test_run)
else:
@@ -152,10 +159,12 @@ def run_test_execution(test_suite_id: str | UUID, username: str | None = None, r
test_run.test_endtime = datetime.now(UTC) + time_delta
test_run.status = "Complete"
test_run.save()
+ session.commit()
LOG.info("Updating latest run for test suite")
test_suite.last_complete_test_run_id = test_run.id
test_suite.save()
+ session.commit()
if not test_suite.is_monitor:
send_test_run_notifications(test_run)
@@ -196,6 +205,13 @@ def _sync_monitor_definitions(sql_generator: TestExecutionSQL) -> None:
# Freshness monitors will be inserted after profiling
run_monitor_generation(test_suite_id, ["Volume_Trend"], mode="insert")
+ # Autogenerate missing freshness monitors if profiling data exists
+ if sql_generator.table_group.last_complete_profile_run_id:
+ missing_monitors = fetch_dict_from_db(*sql_generator.get_missing_freshness_monitors())
+ if missing_monitors:
+ table_names = [row["table_name"] for row in missing_monitors]
+ run_monitor_generation(test_suite_id, ["Freshness_Trend"], mode="insert", table_names=table_names)
+
# Regenerate monitors that errored in previous run
errored_monitors = fetch_dict_from_db(*sql_generator.get_errored_autogen_monitors())
if errored_monitors:
@@ -215,6 +231,7 @@ def _run_tests(
test_run = sql_generator.test_run
test_run.set_progress(run_type, "Running")
test_run.save()
+ get_current_session().commit()
def update_test_progress(progress: ThreadedProgress) -> None:
test_run.set_progress(
@@ -226,6 +243,7 @@ def update_test_progress(progress: ThreadedProgress) -> None:
else None,
)
test_run.save()
+ get_current_session().commit()
LOG.info(f"Running {run_type} tests: {len(test_defs)}")
test_results, result_columns, error_data = fetch_from_db_threaded(
@@ -265,6 +283,7 @@ def _run_cat_tests(
test_run = sql_generator.test_run
test_run.set_progress("CAT", "Running")
test_run.save()
+ get_current_session().commit()
total_count = len(test_defs)
LOG.info(f"Aggregating CAT tests: {total_count}")
@@ -281,6 +300,7 @@ def update_aggegate_progress(progress: ThreadedProgress) -> None:
else None,
)
test_run.save()
+ get_current_session().commit()
LOG.info(f"Running aggregated CAT test queries: {len(aggregate_queries)}")
aggregate_results, _, aggregate_errors = fetch_from_db_threaded(
@@ -310,6 +330,7 @@ def update_aggegate_progress(progress: ThreadedProgress) -> None:
error="Rerunning errored tests singly",
)
test_run.save()
+ get_current_session().commit()
def update_single_progress(progress: ThreadedProgress) -> None:
test_run.set_progress(
@@ -321,6 +342,7 @@ def update_single_progress(progress: ThreadedProgress) -> None:
),
)
test_run.save()
+ get_current_session().commit()
LOG.info(f"Rerunning errored CAT tests singly: {len(single_test_defs)}")
single_results, _, single_errors = fetch_from_db_threaded(
diff --git a/testgen/commands/run_test_validation.py b/testgen/commands/run_test_validation.py
index 55fb6185..cdb961be 100644
--- a/testgen/commands/run_test_validation.py
+++ b/testgen/commands/run_test_validation.py
@@ -9,20 +9,29 @@
LOG = logging.getLogger("testgen")
-def run_test_validation(sql_generator: TestExecutionSQL, test_defs: list[TestExecutionDef]) -> list[TestExecutionDef]:
- test_defs_by_id: dict[UUID, TestExecutionDef] = {td.id: td for td in test_defs}
+def collect_test_identifiers(
+ test_defs: list[TestExecutionDef],
+ quote_char: str,
+) -> tuple[dict[tuple[str, str, str | None], set[UUID]], set[str], dict[UUID, list[str]]]:
+ """Collect identifiers (schema, table, column) that need validation from test definitions.
+
+ Returns:
+ identifiers_to_check: {(schema, table, column|None): {test_ids}}
+ target_schemas: set of schemas to query
+ errors: {test_id: [error_messages]}
+ """
identifiers_to_check: dict[tuple[str, str, str | None], set[UUID]] = {}
- target_schemas = set()
- quote = sql_generator.flavor_service.quote_character
+ target_schemas: set[str] = set()
+ errors: dict[UUID, list[str]] = {}
def add_identifiers(test_id: UUID, schema: str, table: str, columns: str | None = None, single_column: bool = False) -> None:
target_schemas.add(schema)
if columns:
if single_column:
- identifiers = [(schema.lower(), table.lower(), columns.strip(f" {quote}").lower())]
+ identifiers = [(schema.lower(), table.lower(), columns.strip(f" {quote_char}").lower())]
else:
- column_names = re.split(rf",(?=(?:[^\{quote}]*\{quote}[^\{quote}]*\{quote})*[^\{quote}]*$)", columns)
- column_names = [col.strip(f" {quote}") for col in column_names]
+ column_names = re.split(rf",(?=(?:[^\{quote_char}]*\{quote_char}[^\{quote_char}]*\{quote_char})*[^\{quote_char}]*$)", columns)
+ column_names = [col.strip(f" {quote_char}") for col in column_names]
identifiers = [(schema.lower(), table.lower(), col.lower()) for col in column_names if col]
else:
identifiers = [(schema.lower(), table.lower(), None)]
@@ -32,11 +41,10 @@ def add_identifiers(test_id: UUID, schema: str, table: str, columns: str | None
identifiers_to_check[key] = set()
identifiers_to_check[key].add(test_id)
- def add_test_error(test_ids: list[UUID], error: str) -> None:
- for test_id in test_ids:
- if not test_defs_by_id[test_id].errors:
- test_defs_by_id[test_id].errors.append("Deactivated")
- test_defs_by_id[test_id].errors.append(error)
+ def add_error(test_id: UUID, error: str) -> None:
+ if test_id not in errors:
+ errors[test_id] = ["Deactivated"]
+ errors[test_id].append(error)
for td in test_defs:
# No validation needed for custom query or table group tests
@@ -64,9 +72,50 @@ def add_test_error(test_ids: list[UUID], error: str) -> None:
if td.match_groupby_names:
add_identifiers(td.id, td.match_schema_name, td.match_table_name, td.match_groupby_names)
else:
- add_test_error([td.id], "Invalid test: match schema, table, or column not defined")
+ add_error(td.id, "Invalid test: match schema, table, or column not defined")
+ else:
+ add_error(td.id, "Invalid test: schema, table, or column not defined")
+
+ return identifiers_to_check, target_schemas, errors
+
+
+def check_identifiers(
+ identifiers_to_check: dict[tuple[str, str, str | None], set[UUID]],
+ target_tables: set[tuple[str, str]],
+ target_columns: set[tuple[str, str, str]],
+) -> dict[UUID, list[str]]:
+ """Check collected identifiers against actual target tables/columns.
+
+ Returns {test_id: [error_messages]} for identifiers that don't exist.
+ """
+ errors: dict[UUID, list[str]] = {}
+
+ for identifier, test_ids in identifiers_to_check.items():
+ table = (identifier[0], identifier[1])
+ if table not in target_tables:
+ error = f"Missing table: {'.'.join(table)}"
+ elif identifier[2] and identifier not in target_columns:
+ error = f"Missing column: {'.'.join(identifier)}"
else:
- add_test_error([td.id], "Invalid test: schema, table, or column not defined")
+ continue
+
+ for test_id in test_ids:
+ if test_id not in errors:
+ errors[test_id] = ["Deactivated"]
+ errors[test_id].append(error)
+
+ return errors
+
+
+def run_test_validation(sql_generator: TestExecutionSQL, test_defs: list[TestExecutionDef]) -> list[TestExecutionDef]:
+ quote = sql_generator.flavor_service.quote_character
+
+ identifiers_to_check, target_schemas, collection_errors = collect_test_identifiers(test_defs, quote)
+
+ # Apply collection errors to test defs
+ test_defs_by_id: dict[UUID, TestExecutionDef] = {td.id: td for td in test_defs}
+ for test_id, error_list in collection_errors.items():
+ test_defs_by_id[test_id].errors = error_list
if target_schemas:
LOG.info("Getting tables and columns in target schemas for validation")
@@ -84,12 +133,13 @@ def add_test_error(test_ids: list[UUID], error: str) -> None:
for item in target_identifiers
}
- for identifier, test_ids in identifiers_to_check.items():
- table = (identifier[0], identifier[1])
- if table not in target_tables:
- add_test_error(test_ids, f"Missing table: {'.'.join(table)}")
- elif identifier[2] and identifier not in target_columns:
- add_test_error(test_ids, f"Missing column: {'.'.join(identifier)}")
+ check_errors = check_identifiers(identifiers_to_check, target_tables, target_columns)
+ for test_id, error_list in check_errors.items():
+ if not test_defs_by_id[test_id].errors:
+ test_defs_by_id[test_id].errors = error_list
+ else:
+ # Skip "Deactivated" prefix since it's already there from collection_errors or we add it
+ test_defs_by_id[test_id].errors.extend(error_list[1:] if test_defs_by_id[test_id].errors else error_list)
error_results = sql_generator.get_test_errors(test_defs_by_id.values())
if error_results:
diff --git a/testgen/commands/run_upgrade_db_config.py b/testgen/commands/run_upgrade_db_config.py
index 95ec4bc0..712149ba 100644
--- a/testgen/commands/run_upgrade_db_config.py
+++ b/testgen/commands/run_upgrade_db_config.py
@@ -51,32 +51,36 @@ def _get_upgrade_template_directory():
return "dbupgrade"
-def _get_upgrade_scripts(sub_directory: str, params_mapping: dict, mask: str = r"^.*sql$", min_val: str = "") -> tuple[list[tuple[str, dict]], str]:
+def _get_upgrade_scripts(sub_directory: str, params_mapping: dict, mask: str = r"^.*sql$", min_val: str = "") -> list[tuple[str, str]]:
files = sorted(get_template_files(mask=mask, sub_directory=sub_directory), key=lambda key: str(key))
- max_prefix = ""
- queries = []
+ scripts = []
for file in files:
if file.name > min_val:
template = file.read_text("utf-8")
query = replace_params(template, params_mapping)
- queries.append((query, None))
- max_prefix = file.name[0:4]
+ scripts.append((file.name[0:4], query))
- if len(queries) == 0:
+ if not scripts:
LOG.debug(f"No sql files were found for the mask {mask} in subdirectory {sub_directory}")
- return queries, max_prefix
+ return scripts
-def _execute_upgrade_scripts(params_mapping: dict, lstScripts: list[tuple[str, dict]]):
- # Run scripts using admin credentials
- execute_db_queries(
- lstScripts,
- user_override=params_mapping["TESTGEN_ADMIN_USER"],
- password_override=params_mapping["TESTGEN_ADMIN_PASSWORD"],
- user_type="schema_admin",
- )
+def _execute_upgrade_scripts(params_mapping: dict, scripts: list[tuple[str, str]]) -> bool:
+ admin_user = params_mapping["TESTGEN_ADMIN_USER"]
+ admin_password = params_mapping["TESTGEN_ADMIN_PASSWORD"]
+
+ for revision_prefix, query in scripts:
+ LOG.info(f"Applying upgrade script {revision_prefix}")
+ execute_db_queries(
+ [(query, None)],
+ user_override=admin_user,
+ password_override=admin_password,
+ user_type="schema_admin",
+ )
+ _update_revision_number(params_mapping, revision_prefix)
+
return True
@@ -131,18 +135,17 @@ def run_upgrade_db_config() -> bool:
next_revision = _format_revision_prefix(_get_next_revision_prefix(params_mapping))
upgrade_dir = _get_upgrade_template_directory()
- queries, max_revision = _get_upgrade_scripts(upgrade_dir, params_mapping, min_val=next_revision)
- LOG.info(f"Current revision: {current_revision}. Latest revision: {max_revision or current_revision}. Upgrade scripts: {len(queries)}")
- if len(queries) > 0:
- has_been_upgraded = _execute_upgrade_scripts(params_mapping, queries)
- else:
- has_been_upgraded = False
+ scripts = _get_upgrade_scripts(upgrade_dir, params_mapping, min_val=next_revision)
+ latest_revision = scripts[-1][0] if scripts else current_revision
+ LOG.info(f"Current revision: {current_revision}. Latest revision: {latest_revision}. Upgrade scripts: {len(scripts)}")
+ if scripts:
+ _execute_upgrade_scripts(params_mapping, scripts)
LOG.info("Refreshing static metadata")
_refresh_static_metadata(params_mapping)
+ has_been_upgraded = bool(scripts)
if has_been_upgraded:
- _update_revision_number(params_mapping, max_revision)
LOG.info("Application data was successfully upgraded, and static metadata was refreshed.")
else:
LOG.info("Database upgrade was not required. Static metadata was refreshed.")
@@ -155,6 +158,5 @@ def is_db_revision_up_to_date():
strNextPrefix = _format_revision_prefix(_get_next_revision_prefix(params_mapping))
upgrade_dir = _get_upgrade_template_directory()
- # Retrieve and execute upgrade scripts, if any
- lstQueries, max_prefix = _get_upgrade_scripts(upgrade_dir, params_mapping, min_val=strNextPrefix)
- return len(lstQueries) == 0
+ scripts = _get_upgrade_scripts(upgrade_dir, params_mapping, min_val=strNextPrefix)
+ return len(scripts) == 0
diff --git a/testgen/commands/test_thresholds_prediction.py b/testgen/commands/test_thresholds_prediction.py
index ca7b679b..7f6617ee 100644
--- a/testgen/commands/test_thresholds_prediction.py
+++ b/testgen/commands/test_thresholds_prediction.py
@@ -213,6 +213,11 @@ def compute_freshness_threshold(
if schedule.stage == "active":
excluded_days = frozenset(range(7)) - schedule.active_days if schedule.active_days else None
+ # Once the schedule is active, excluded_days is the single source of truth
+ # for day exclusion — it supersedes exclude_weekends, which was the user's
+ # initial hint before enough data was available for schedule inference.
+ schedule_exclude_weekends = False if excluded_days else exclude_weekends
+
# For sub-daily schedules, apply window exclusion for overnight gaps
has_window = (
schedule.frequency == "sub_daily"
@@ -228,7 +233,7 @@ def compute_freshness_threshold(
upper_percentile=upper_percentile,
floor_multiplier=floor_multiplier,
lower_percentile=lower_percentile,
- exclude_weekends=exclude_weekends,
+ exclude_weekends=schedule_exclude_weekends,
holiday_codes=holiday_codes,
tz=schedule_tz,
staleness_factor=staleness_factor,
@@ -246,7 +251,7 @@ def compute_freshness_threshold(
holiday_dates = resolve_holiday_dates(holiday_codes, history.index) if holiday_codes else None
schedule_upper = minutes_to_next_deadline(
result.last_update, schedule,
- exclude_weekends, holiday_dates, schedule_tz,
+ schedule_exclude_weekends, holiday_dates, schedule_tz,
deadline_buffer, excluded_days=excluded_days,
)
if schedule_upper is not None:
diff --git a/testgen/common/auth.py b/testgen/common/auth.py
new file mode 100644
index 00000000..94c83ed0
--- /dev/null
+++ b/testgen/common/auth.py
@@ -0,0 +1,59 @@
+import base64
+import logging
+from datetime import UTC, datetime, timedelta
+
+import bcrypt
+import jwt
+
+from testgen import settings
+
+LOG = logging.getLogger("testgen")
+
+
+def get_jwt_signing_key() -> bytes:
+ """Decode the base64-encoded JWT signing key from settings."""
+ return base64.b64decode(settings.JWT_HASHING_KEY_B64.encode("ascii"))
+
+
+def create_jwt_token(username: str, expiry_days: int = 30) -> str:
+ """Create a signed JWT token with the standard TestGen payload schema."""
+ payload = {
+ "username": username,
+ "exp_date": (datetime.now(UTC) + timedelta(days=expiry_days)).timestamp(),
+ }
+ return jwt.encode(payload, get_jwt_signing_key(), algorithm="HS256")
+
+
+def decode_jwt_token(token_str: str) -> dict:
+ """Decode and validate a JWT token. Returns the payload dict.
+
+ Raises ValueError if the token is invalid or expired.
+ """
+ try:
+ payload = jwt.decode(token_str, get_jwt_signing_key(), algorithms=["HS256"])
+ except jwt.InvalidTokenError as e:
+ raise ValueError(f"Invalid token: {e}") from e
+
+ if payload.get("exp_date", 0) <= datetime.now(UTC).timestamp():
+ raise ValueError("Token has expired")
+
+ return payload
+
+
+def verify_password(password: str, hashed_password: str) -> bool:
+ """Verify a plaintext password against a bcrypt hash.
+
+ Same algorithm as streamlit_authenticator.
+ """
+ return bcrypt.checkpw(password.encode(), hashed_password.encode())
+
+
+def check_permission(user: object, permission: str) -> bool:
+ """Check if a user has the given permission.
+
+ Uses the RBAC provider registered by installed plugins.
+ Returns True (all allowed) if no plugin overrides the default.
+ """
+ from testgen.utils.plugins import PluginHook
+
+ return PluginHook.instance().rbac.check_permission(user, permission)
diff --git a/testgen/common/database/database_service.py b/testgen/common/database/database_service.py
index 4b340b18..0e338318 100644
--- a/testgen/common/database/database_service.py
+++ b/testgen/common/database/database_service.py
@@ -26,7 +26,12 @@
get_tg_username,
)
from testgen.common.database import FilteredStringIO
-from testgen.common.database.flavor.flavor_service import ConnectionParams, FlavorService, SQLFlavor
+from testgen.common.database.flavor.flavor_service import (
+ ConnectionParams,
+ FlavorService,
+ SQLFlavor,
+ resolve_connection_params,
+)
from testgen.common.read_file import get_template_files
from testgen.utils import get_exception_message
@@ -407,27 +412,22 @@ def _init_target_db_connection() -> Connection:
raise ValueError("Target database connection parameters were not set")
flavor_service = get_flavor_service(target_db_params["sql_flavor"])
- flavor_service.init(target_db_params)
+ params = resolve_connection_params(target_db_params)
engine = engine_cache.target_db
if not engine:
try:
- engine: Engine = create_engine(
- flavor_service.get_connection_string(),
- connect_args=flavor_service.get_connect_args(),
- **flavor_service.get_engine_args(),
- )
+ engine: Engine = flavor_service.create_engine(target_db_params)
except SQLAlchemyError as e:
- raise ValueError(f"Failed to create engine for Target database '{flavor_service.dbname}' (User type = normal)") from e
+ raise ValueError(f"Failed to create engine for Target database '{params.dbname}' (User type = normal)") from e
else:
engine_cache.target_db = engine
-
connection: Connection = engine.connect()
- for query, params in flavor_service.get_pre_connection_queries():
+ for query, query_params in flavor_service.get_pre_connection_queries(params):
try:
- connection.execute(text(query), params)
+ connection.execute(text(query), query_params)
except Exception:
LOG.warning(
f"Failed to execute preconnection query on Target database: {query}",
diff --git a/testgen/common/database/flavor/bigquery_flavor_service.py b/testgen/common/database/flavor/bigquery_flavor_service.py
index 8e80f146..47150a73 100644
--- a/testgen/common/database/flavor/bigquery_flavor_service.py
+++ b/testgen/common/database/flavor/bigquery_flavor_service.py
@@ -1,6 +1,6 @@
from typing import Any
-from testgen.common.database.flavor.flavor_service import FlavorService
+from testgen.common.database.flavor.flavor_service import FlavorService, ResolvedConnectionParams
class BigqueryFlavorService(FlavorService):
@@ -8,15 +8,16 @@ class BigqueryFlavorService(FlavorService):
quote_character = "`"
escaped_single_quote = "\\'"
varchar_type = "STRING"
+ url_scheme = "bigquery"
- def get_connection_string_head(self):
- return "bigquery://"
+ def get_connection_string_head(self, params: ResolvedConnectionParams) -> str: # noqa: ARG002
+ return f"{self.url_scheme}://"
- def get_connection_string_from_fields(self):
- return f"bigquery://{self.service_account_key["project_id"] if self.service_account_key else ""}"
+ def get_connection_string_from_fields(self, params: ResolvedConnectionParams) -> str:
+ return f"{self.url_scheme}://{params.service_account_key["project_id"] if params.service_account_key else ""}"
- def get_connect_args(self) -> dict:
+ def get_connect_args(self, params: ResolvedConnectionParams) -> dict: # noqa: ARG002
return {}
- def get_engine_args(self) -> dict[str,Any]:
- return {"credentials_info": self.service_account_key} if self.service_account_key else {}
+ def get_engine_args(self, params: ResolvedConnectionParams) -> dict[str, Any]:
+ return {"credentials_info": params.service_account_key} if params.service_account_key else {}
diff --git a/testgen/common/database/flavor/databricks_flavor_service.py b/testgen/common/database/flavor/databricks_flavor_service.py
index b9b339ef..8b143b92 100644
--- a/testgen/common/database/flavor/databricks_flavor_service.py
+++ b/testgen/common/database/flavor/databricks_flavor_service.py
@@ -1,6 +1,6 @@
from urllib.parse import quote_plus
-from testgen.common.database.flavor.flavor_service import FlavorService
+from testgen.common.database.flavor.flavor_service import FlavorService, ResolvedConnectionParams
class DatabricksFlavorService(FlavorService):
@@ -8,12 +8,58 @@ class DatabricksFlavorService(FlavorService):
quote_character = "`"
escaped_single_quote = "\\'"
varchar_type = "STRING"
+ url_scheme = "databricks"
- def get_connection_string_head(self):
- return f"{self.flavor}://{self.username}:{quote_plus(self.password)}@"
+ def get_pre_connection_queries(self, params: ResolvedConnectionParams) -> list[tuple[str, dict | None]]:
+ if params.dbname:
+ return [(f"USE CATALOG `{params.dbname}`", None)]
+ return []
- def get_connection_string_from_fields(self):
+ def get_connect_args(self, params: ResolvedConnectionParams) -> dict:
+ args = {}
+ if params.dbname:
+ args["catalog"] = params.dbname
+ if params.connect_by_key:
+ args["credentials_provider"] = self._get_oauth_credentials_provider(params)
+ return args
+
+ def get_connection_string_head(self, params: ResolvedConnectionParams) -> str:
+ if params.connect_by_key:
+ return f"{self.url_scheme}://oauth:@"
+ return f"{self.url_scheme}://token:{quote_plus(params.password)}@"
+
+ def get_connection_string_from_fields(self, params: ResolvedConnectionParams) -> str:
+ if params.connect_by_key:
+ return (
+ f"{self.url_scheme}://oauth:@{params.host}:{params.port}/{params.dbname}"
+ f"?http_path={params.http_path}&catalog={params.dbname}"
+ )
return (
- f"{self.flavor}://{self.username}:{quote_plus(self.password)}@{self.host}:{self.port}/{self.dbname}"
- f"?http_path={self.http_path}"
+ f"{self.url_scheme}://token:{quote_plus(params.password)}@{params.host}:{params.port}/{params.dbname}"
+ f"?http_path={params.http_path}&catalog={params.dbname}"
+ )
+
+ def _get_oauth_credentials_provider(self, params: ResolvedConnectionParams):
+ from databricks.sdk.core import Config, oauth_service_principal
+
+ config = Config(
+ host=f"https://{params.host}",
+ client_id=params.username,
+ client_secret=params.password,
)
+ # oauth_service_principal(config) returns an OAuthCredentialsProvider,
+ # which is callable: provider() -> Dict[str, str] (auth headers).
+ #
+ # The SQL connector's ExternalAuthProvider expects a CredentialsProvider
+ # with two levels: credentials_provider() -> HeaderFactory, then
+ # HeaderFactory() -> Dict[str, str]. Wrap to bridge the interface.
+ oauth_provider = oauth_service_principal(config)
+
+ class _CredentialsProvider:
+ def auth_type(self):
+ return "oauth"
+
+ def __call__(self):
+ return oauth_provider
+
+ return _CredentialsProvider()
diff --git a/testgen/common/database/flavor/flavor_service.py b/testgen/common/database/flavor/flavor_service.py
index bb253595..a56ac8ba 100644
--- a/testgen/common/database/flavor/flavor_service.py
+++ b/testgen/common/database/flavor/flavor_service.py
@@ -1,10 +1,15 @@
from abc import abstractmethod
+from dataclasses import dataclass
from typing import Any, Literal, TypedDict
-from urllib.parse import parse_qs, urlparse
+from urllib.parse import quote_plus
+
+from sqlalchemy import create_engine as sqlalchemy_create_engine
+from sqlalchemy.engine.base import Engine
from testgen.common.encrypt import DecryptText
-SQLFlavor = Literal["redshift", "redshift_spectrum", "snowflake", "mssql", "postgresql", "databricks"]
+SQLFlavor = Literal["redshift", "redshift_spectrum", "snowflake", "mssql", "postgresql", "databricks", "bigquery", "oracle", "sap_hana"]
+RowLimitingClause = Literal["limit", "top", "fetch"]
class ConnectionParams(TypedDict):
@@ -25,6 +30,59 @@ class ConnectionParams(TypedDict):
connect_with_identity: bool
sql_flavor_code: str
+
+@dataclass(frozen=True, slots=True)
+class ResolvedConnectionParams:
+ url: str = ""
+ connect_by_url: bool = False
+ username: str = ""
+ password: str | None = None
+ host: str = ""
+ port: str = ""
+ dbname: str = ""
+ dbschema: str | None = None
+ sql_flavor: str = ""
+ sql_flavor_code: str = ""
+ connect_by_key: bool = False
+ private_key: str | None = None
+ private_key_passphrase: str | None = None
+ http_path: str = ""
+ catalog: str = ""
+ warehouse: str = ""
+ service_account_key: dict[str, Any] | None = None
+ connect_with_identity: bool = False
+
+
+def _decrypt_if_needed(value: Any) -> str | None:
+ if isinstance(value, memoryview | bytes):
+ return DecryptText(value)
+ return value
+
+
+def resolve_connection_params(connection_params: ConnectionParams) -> ResolvedConnectionParams:
+ sql_flavor = connection_params.get("sql_flavor") or ""
+ return ResolvedConnectionParams(
+ url=connection_params.get("url") or "",
+ connect_by_url=connection_params.get("connect_by_url", False),
+ username=connection_params.get("project_user") or "",
+ password=_decrypt_if_needed(connection_params.get("project_pw_encrypted")),
+ host=connection_params.get("project_host") or "",
+ port=connection_params.get("project_port") or "",
+ dbname=connection_params.get("project_db") or "",
+ dbschema=connection_params.get("table_group_schema"),
+ sql_flavor=sql_flavor,
+ sql_flavor_code=connection_params.get("sql_flavor_code") or sql_flavor,
+ connect_by_key=connection_params.get("connect_by_key", False),
+ private_key=_decrypt_if_needed(connection_params.get("private_key")),
+ private_key_passphrase=_decrypt_if_needed(connection_params.get("private_key_passphrase")),
+ http_path=connection_params.get("http_path") or "",
+ catalog=connection_params.get("catalog") or "",
+ warehouse=connection_params.get("warehouse") or "",
+ service_account_key=connection_params.get("service_account_key"),
+ connect_with_identity=connection_params.get("connect_with_identity") or False,
+ )
+
+
class FlavorService:
concat_operator = "||"
@@ -34,104 +92,39 @@ class FlavorService:
escape_clause = ""
varchar_type = "VARCHAR(1000)"
ddf_table_ref = "table_name"
- use_top = False
+ row_limiting_clause: RowLimitingClause = "limit"
default_uppercase = False
+ test_query = "SELECT 1"
+ url_scheme = "postgresql"
- def init(self, connection_params: ConnectionParams):
- self.url = connection_params.get("url") or ""
- self.connect_by_url = connection_params.get("connect_by_url", False)
- self.username = connection_params.get("project_user") or ""
- self.host = connection_params.get("project_host") or ""
- self.port = connection_params.get("project_port") or ""
- self.dbname = connection_params.get("project_db") or ""
- self.flavor = connection_params.get("sql_flavor")
- self.dbschema = connection_params.get("table_group_schema", None)
- self.connect_by_key = connection_params.get("connect_by_key", False)
- self.http_path = connection_params.get("http_path") or ""
- self.catalog = connection_params.get("catalog") or ""
- self.warehouse = connection_params.get("warehouse") or ""
- self.service_account_key = connection_params.get("service_account_key", None)
- self.connect_with_identity = connection_params.get("connect_with_identity") or False
- self.sql_flavor_code = connection_params.get("sql_flavor_code") or self.flavor
-
- password = connection_params.get("project_pw_encrypted", None)
- if isinstance(password, memoryview) or isinstance(password, bytes):
- password = DecryptText(password)
- self.password = password
-
- private_key = connection_params.get("private_key", None)
- if isinstance(private_key, memoryview) or isinstance(private_key, bytes):
- private_key = DecryptText(private_key)
- self.private_key = private_key
-
- private_key_passphrase = connection_params.get("private_key_passphrase", None)
- if isinstance(private_key_passphrase, memoryview) or isinstance(private_key_passphrase, bytes):
- private_key_passphrase = DecryptText(private_key_passphrase)
- self.private_key_passphrase = private_key_passphrase
-
- def get_pre_connection_queries(self) -> list[tuple[str, dict | None]]:
+ def get_pre_connection_queries(self, params: ResolvedConnectionParams) -> list[tuple[str, dict | None]]: # noqa: ARG002
return []
- def get_connect_args(self) -> dict:
+ def get_connect_args(self, params: ResolvedConnectionParams) -> dict: # noqa: ARG002
return {"connect_timeout": 3600}
- def get_engine_args(self) -> dict[str,Any]:
+ def get_engine_args(self, params: ResolvedConnectionParams) -> dict[str, Any]: # noqa: ARG002
return {}
- def get_connection_string(self) -> str:
- if self.connect_by_url:
- header = self.get_connection_string_head()
- url = header + self.url
- return url
+ def create_engine(self, connection_params: ConnectionParams) -> Engine:
+ params = resolve_connection_params(connection_params)
+ return sqlalchemy_create_engine(
+ self.get_connection_string(params),
+ connect_args=self.get_connect_args(params),
+ **self.get_engine_args(params),
+ )
+
+ def get_connection_string(self, params: ResolvedConnectionParams) -> str:
+ if params.connect_by_url:
+ header = self.get_connection_string_head(params)
+ return header + params.url
else:
- return self.get_connection_string_from_fields()
+ return self.get_connection_string_from_fields(params)
@abstractmethod
- def get_connection_string_from_fields(self) -> str:
+ def get_connection_string_from_fields(self, params: ResolvedConnectionParams) -> str:
raise NotImplementedError("Subclasses must implement this method")
- @abstractmethod
- def get_connection_string_head(self) -> str:
- raise NotImplementedError("Subclasses must implement this method")
+ def get_connection_string_head(self, params: ResolvedConnectionParams) -> str:
+ return f"{self.url_scheme}://{params.username}:{quote_plus(params.password)}@"
- def get_parts_from_connection_string(self) -> dict[str, Any]:
- if self.connect_by_url:
- if not self.url:
- return {}
-
- parsed_url = urlparse(self.get_connection_string())
- credentials, location = (
- parsed_url.netloc if "@" in parsed_url.netloc else f"@{parsed_url.netloc}"
- ).split("@")
- username, password = (
- credentials if ":" in credentials else f"{credentials}:"
- ).split(":")
- host, port = (
- location if ":" in location else f"{location}:"
- ).split(":")
-
- database = (path_patrs[0] if (path_patrs := parsed_url.path.strip("/").split("/")) else "")
-
- extras = {
- param_name: param_values[0]
- for param_name, param_values in parse_qs(parsed_url.query or "").items()
- }
-
- return {
- "username": username,
- "password": password,
- "host": host,
- "port": port,
- "dbname": database,
- **extras,
- }
-
- return {
- "username": self.username,
- "password": self.password,
- "host": self.host,
- "port": self.port,
- "dbname": self.dbname,
- "http_path": self.http_path,
- "catalog": self.catalog,
- }
diff --git a/testgen/common/database/flavor/mssql_flavor_service.py b/testgen/common/database/flavor/mssql_flavor_service.py
index 088c11e9..70ee3d11 100644
--- a/testgen/common/database/flavor/mssql_flavor_service.py
+++ b/testgen/common/database/flavor/mssql_flavor_service.py
@@ -3,50 +3,48 @@
from sqlalchemy.engine import URL
from testgen import settings
-from testgen.common.database.flavor.flavor_service import FlavorService
+from testgen.common.database.flavor.flavor_service import FlavorService, ResolvedConnectionParams
class MssqlFlavorService(FlavorService):
concat_operator = "+"
escaped_underscore = "[_]"
- use_top = True
+ row_limiting_clause = "top"
+ url_scheme = "mssql+pyodbc"
- def get_connection_string_head(self):
- return f"mssql+pyodbc://{self.username}:{quote_plus(self.password)}@"
-
- def get_connection_string_from_fields(self):
+ def get_connection_string_from_fields(self, params: ResolvedConnectionParams) -> str:
connection_url = URL.create(
- "mssql+pyodbc",
- username=self.username,
- password=quote_plus(self.password or ""),
- host=self.host,
- port=int(self.port or 1443),
- database=self.dbname,
+ self.url_scheme,
+ username=params.username,
+ password=quote_plus(params.password or ""),
+ host=params.host,
+ port=int(params.port or 1443),
+ database=params.dbname,
query={
"driver": "ODBC Driver 18 for SQL Server",
},
)
- if self.connect_with_identity:
+ if params.connect_with_identity:
connection_url = connection_url._replace(username=None, password=None).update_query_dict({
"encrypt": "yes",
"authentication": "ActiveDirectoryMsi",
})
- if self.sql_flavor_code == "synapse_mssql":
+ if params.sql_flavor_code == "synapse_mssql":
connection_url = connection_url.update_query_dict({"autocommit": "True"})
return connection_url.render_as_string(hide_password=False)
- def get_pre_connection_queries(self):
+ def get_pre_connection_queries(self, params: ResolvedConnectionParams) -> list[tuple[str, dict | None]]: # noqa: ARG002
return [
("SET ANSI_DEFAULTS ON;", None),
("SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;", None),
]
-
- def get_connect_args(self):
- connect_args = super().get_connect_args()
+
+ def get_connect_args(self, params: ResolvedConnectionParams) -> dict:
+ connect_args = super().get_connect_args(params)
if settings.SKIP_DATABASE_CERTIFICATE_VERIFICATION:
connect_args["TrustServerCertificate"] = "yes"
return connect_args
diff --git a/testgen/common/database/flavor/oracle_flavor_service.py b/testgen/common/database/flavor/oracle_flavor_service.py
new file mode 100644
index 00000000..3efd3854
--- /dev/null
+++ b/testgen/common/database/flavor/oracle_flavor_service.py
@@ -0,0 +1,32 @@
+import sys
+from urllib.parse import quote_plus
+
+import oracledb
+
+from testgen.common.database.flavor.flavor_service import FlavorService, ResolvedConnectionParams
+
+# https://stackoverflow.com/a/74105559
+oracledb.version = "8.3.0"
+sys.modules["cx_Oracle"] = oracledb
+
+
+class OracleFlavorService(FlavorService):
+
+ escaped_underscore = "\\_"
+ escape_clause = "ESCAPE '\\'"
+ varchar_type = "VARCHAR2(1000)"
+ default_uppercase = True
+ row_limiting_clause = "fetch"
+ test_query = "SELECT 1 FROM DUAL"
+ url_scheme = "oracle"
+
+ def get_connection_string_from_fields(self, params: ResolvedConnectionParams) -> str:
+ return f"{self.url_scheme}://{params.username}:{quote_plus(params.password)}@{params.host}:{params.port}?service_name={params.dbname}"
+
+ def get_pre_connection_queries(self, params: ResolvedConnectionParams) -> list[tuple[str, dict | None]]: # noqa: ARG002
+ return [
+ ("ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD HH24:MI:SS'", None),
+ ]
+
+ def get_connect_args(self, params: ResolvedConnectionParams) -> dict: # noqa: ARG002
+ return {}
diff --git a/testgen/common/database/flavor/redshift_flavor_service.py b/testgen/common/database/flavor/redshift_flavor_service.py
index 36f89418..3b6c6e6a 100644
--- a/testgen/common/database/flavor/redshift_flavor_service.py
+++ b/testgen/common/database/flavor/redshift_flavor_service.py
@@ -1,20 +1,13 @@
from urllib.parse import quote_plus
-from testgen.common.database.flavor.flavor_service import FlavorService
+from testgen.common.database.flavor.flavor_service import FlavorService, ResolvedConnectionParams
class RedshiftFlavorService(FlavorService):
escaped_underscore = "\\\\_"
+ url_scheme = "postgresql"
- def init(self, connection_params: dict):
- super().init(connection_params)
- # This is for connection purposes. sqlalchemy 1.4.46 uses postgresql to connect to redshift database
- self.flavor = "postgresql"
-
- def get_connection_string_head(self):
- return f"{self.flavor}://{self.username}:{quote_plus(self.password)}@"
-
- def get_connection_string_from_fields(self):
+ def get_connection_string_from_fields(self, params: ResolvedConnectionParams) -> str:
# STANDARD FORMAT: strConnect = 'flavor://username:password@host:port/database'
- return f"{self.flavor}://{self.username}:{quote_plus(self.password)}@{self.host}:{self.port}/{self.dbname}"
+ return f"{self.url_scheme}://{params.username}:{quote_plus(params.password)}@{params.host}:{params.port}/{params.dbname}"
diff --git a/testgen/common/database/flavor/sap_hana_flavor_service.py b/testgen/common/database/flavor/sap_hana_flavor_service.py
new file mode 100644
index 00000000..f6b6da17
--- /dev/null
+++ b/testgen/common/database/flavor/sap_hana_flavor_service.py
@@ -0,0 +1,20 @@
+from urllib.parse import quote_plus
+
+from testgen.common.database.flavor.flavor_service import FlavorService, ResolvedConnectionParams
+
+
+class SapHanaFlavorService(FlavorService):
+
+ varchar_type = "NVARCHAR(1000)"
+ default_uppercase = True
+ test_query = "SELECT 1 FROM DUMMY"
+ url_scheme = "hana+hdbcli"
+
+ def get_connection_string_from_fields(self, params: ResolvedConnectionParams) -> str:
+ url = f"{self.url_scheme}://{params.username}:{quote_plus(params.password)}@{params.host}:{params.port}/"
+ if params.dbname:
+ url += f"?databaseName={params.dbname}"
+ return url
+
+ def get_connect_args(self, params: ResolvedConnectionParams) -> dict: # noqa: ARG002
+ return {}
diff --git a/testgen/common/database/flavor/snowflake_flavor_service.py b/testgen/common/database/flavor/snowflake_flavor_service.py
index a7bad3d8..5627b6ec 100644
--- a/testgen/common/database/flavor/snowflake_flavor_service.py
+++ b/testgen/common/database/flavor/snowflake_flavor_service.py
@@ -4,7 +4,7 @@
from cryptography.hazmat.primitives import serialization
from snowflake.sqlalchemy import URL
-from testgen.common.database.flavor.flavor_service import FlavorService
+from testgen.common.database.flavor.flavor_service import FlavorService, ResolvedConnectionParams
class SnowflakeFlavorService(FlavorService):
@@ -12,13 +12,14 @@ class SnowflakeFlavorService(FlavorService):
escaped_underscore = "\\\\_"
escape_clause = "ESCAPE '\\\\'"
default_uppercase = True
+ url_scheme = "snowflake"
- def get_connect_args(self):
- if self.connect_by_key:
+ def get_connect_args(self, params: ResolvedConnectionParams) -> dict:
+ if params.connect_by_key:
# https://docs.snowflake.com/en/developer-guide/python-connector/sqlalchemy#key-pair-authentication-support
- private_key_passphrase = self.private_key_passphrase.encode() if self.private_key_passphrase else None
+ private_key_passphrase = params.private_key_passphrase.encode() if params.private_key_passphrase else None
private_key = serialization.load_pem_private_key(
- self.private_key.encode(),
+ params.private_key.encode(),
password=private_key_passphrase,
backend=default_backend(),
)
@@ -32,40 +33,40 @@ def get_connect_args(self):
return {"private_key": private_key_bytes}
return {}
- def get_connection_string_head(self):
- if self.connect_by_key:
- return f"snowflake://{self.username}@"
+ def get_connection_string_head(self, params: ResolvedConnectionParams) -> str:
+ if params.connect_by_key:
+ return f"{self.url_scheme}://{params.username}@"
else:
- return f"snowflake://{self.username}:{quote_plus(self.password)}@"
+ return f"{self.url_scheme}://{params.username}:{quote_plus(params.password)}@"
- def get_connection_string_from_fields(self):
+ def get_connection_string_from_fields(self, params: ResolvedConnectionParams) -> str:
# SNOWFLAKE FORMAT: strConnect = 'flavor://username:password@host/database'
# optionally + '/[schema]' + '?warehouse=xxx'
# NOTE: Snowflake host should NOT include ".snowflakecomputing.com"
- account, _ = self.host.split(".", maxsplit=1) if "." in self.host else ("", "")
- host = self.host
+ account, _ = params.host.split(".", maxsplit=1) if "." in params.host else ("", "")
+ host = params.host
if ".snowflakecomputing.com" not in host:
host = f"{host}.snowflakecomputing.com"
extra_params = {}
- if self.warehouse:
- extra_params["warehouse"] = self.warehouse
+ if params.warehouse:
+ extra_params["warehouse"] = params.warehouse
connection_url = URL(
host=host,
- port=int(self.port if str(self.port).isdigit() else 443),
+ port=int(params.port if str(params.port).isdigit() else 443),
account=account,
- user=self.username,
- password="" if self.connect_by_key else self.password,
- database=self.dbname,
- schema=self.dbschema or "",
+ user=params.username,
+ password="" if params.connect_by_key else params.password,
+ database=params.dbname,
+ schema=params.dbschema or "",
**extra_params,
)
return connection_url
- def get_pre_connection_queries(self):
+ def get_pre_connection_queries(self, params: ResolvedConnectionParams) -> list[tuple[str, dict | None]]: # noqa: ARG002
return [
("ALTER SESSION SET MULTI_STATEMENT_COUNT = 0;", None),
("ALTER SESSION SET WEEK_START = 7;", None),
diff --git a/testgen/common/database/flavor/trino_flavor_service.py b/testgen/common/database/flavor/trino_flavor_service.py
index ce1133cc..d7a78339 100644
--- a/testgen/common/database/flavor/trino_flavor_service.py
+++ b/testgen/common/database/flavor/trino_flavor_service.py
@@ -1,17 +1,16 @@
from urllib.parse import quote_plus
-from testgen.common.database.flavor.flavor_service import FlavorService
+from testgen.common.database.flavor.flavor_service import FlavorService, ResolvedConnectionParams
class TrinoFlavorService(FlavorService):
- def get_connection_string_head(self):
- return f"{self.flavor}://{self.username}:{quote_plus(self.password)}@"
+ url_scheme = "trino"
- def get_connection_string_from_fields(self):
+ def get_connection_string_from_fields(self, params: ResolvedConnectionParams) -> str:
# STANDARD FORMAT: strConnect = 'flavor://username:password@host:port/catalog'
- return f"{self.flavor}://{self.username}:{quote_plus(self.password)}@{self.host}:{self.port}/{self.catalog}"
+ return f"{self.url_scheme}://{params.username}:{quote_plus(params.password)}@{params.host}:{params.port}/{params.catalog}"
- def get_pre_connection_queries(self):
+ def get_pre_connection_queries(self, params: ResolvedConnectionParams) -> list[tuple[str, dict | None]]:
return [
- (f"USE {self.catalog}.{self.dbschema}", None),
+ (f"USE {params.catalog}.{params.dbschema}", None),
]
diff --git a/testgen/common/models/__init__.py b/testgen/common/models/__init__.py
index 3734b11b..6e2b581c 100644
--- a/testgen/common/models/__init__.py
+++ b/testgen/common/models/__init__.py
@@ -1,3 +1,4 @@
+import contextlib
import functools
import platform
import threading
@@ -32,26 +33,41 @@
_current_session_wrapper.value = None
-def with_database_session(func):
- """
- Set up a thread-global SQLAlchemy session to be accessed
- calling `get_current_session()` from any place.
+@contextlib.contextmanager
+def database_session():
+ """Provide a thread-local SQLAlchemy session.
- NOTE: Call once on the main entry point.
- """
+ Nested: yields existing session, no lifecycle management.
+ Owning: commits on clean exit, rolls back on Exception.
- @functools.wraps(func)
- def wrapper(*args, **kwargs):
+ Uses ``except Exception`` (not ``BaseException``) so that Streamlit's
+ ``RerunException`` (a ``BaseException`` subclass) bypasses both rollback
+ and auto-commit. If ``safe_rerun()`` was called, it already committed.
+ """
+ existing = get_current_session()
+ if existing:
+ yield existing
+ return
+ with Session() as session:
+ _current_session_wrapper.value = session
try:
- session = get_current_session()
- if session:
- return func(*args, **kwargs)
-
- with Session() as session:
- _current_session_wrapper.value = session
- return func(*args, **kwargs)
+ yield session
+ except Exception:
+ session.rollback()
+ raise
+ else:
+ session.commit()
finally:
_current_session_wrapper.value = None
+
+
+def with_database_session(func):
+ """Decorator form of :func:`database_session`."""
+
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ with database_session():
+ return func(*args, **kwargs)
return wrapper
diff --git a/testgen/common/models/connection.py b/testgen/common/models/connection.py
index 1b5a96f5..97a5b83b 100644
--- a/testgen/common/models/connection.py
+++ b/testgen/common/models/connection.py
@@ -1,6 +1,7 @@
from collections.abc import Iterable
from dataclasses import dataclass
from typing import Literal, Self
+from urllib.parse import parse_qs, urlparse
from uuid import UUID, uuid4
import streamlit as st
@@ -19,7 +20,6 @@
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import InstrumentedAttribute
-from testgen.common.database.database_service import get_flavor_service
from testgen.common.database.flavor.flavor_service import SQLFlavor
from testgen.common.models import get_current_session
from testgen.common.models.custom_types import JSON_TYPE, EncryptedBytea, EncryptedJson
@@ -27,7 +27,7 @@
from testgen.common.models.table_group import TableGroup
from testgen.utils import is_uuid4
-SQLFlavorCode = Literal["redshift", "redshift_spectrum", "snowflake", "mssql", "azure_mssql", "synapse_mssql", "postgresql", "databricks"]
+SQLFlavorCode = Literal["redshift", "redshift_spectrum", "snowflake", "mssql", "azure_mssql", "synapse_mssql", "postgresql", "databricks", "bigquery", "oracle", "sap_hana"]
@dataclass
@@ -119,15 +119,26 @@ def clear_cache(cls) -> bool:
def save(self) -> None:
if self.connect_by_url and self.url:
- flavor_service = get_flavor_service(self.sql_flavor)
- flavor_service.init(self.to_dict())
-
- connection_parts = flavor_service.get_parts_from_connection_string()
- if connection_parts:
- self.project_host = connection_parts["host"]
- self.project_port = connection_parts["port"]
- self.project_db = connection_parts["dbname"]
- self.http_path = connection_parts.get("http_path") or None
- self.warehouse = connection_parts.get("warehouse") or None
+ # When connect_by_url=True, the URL is the source of truth.
+ # Normalize it (strip scheme/credentials) and sync host/port/db fields from it.
+ url = self.url
+ if "://" in url:
+ url = url.split("://", 1)[1]
+ if "@" in url:
+ url = url.rsplit("@", 1)[1]
+ self.url = url
+
+ parsed = urlparse(f"scheme://_@{url}")
+ location = parsed.netloc.split("@")[-1]
+ if ":" in location:
+ host, port = location.rsplit(":", 1)
+ else:
+ host, port = location, ""
+ self.project_host = host
+ self.project_port = port
+ self.project_db = parsed.path.strip("/").split("/")[0] if parsed.path.strip("/") else ""
+ extras = {k: v[0] for k, v in parse_qs(parsed.query).items()}
+ self.http_path = extras.get("http_path") or None
+ self.warehouse = extras.get("warehouse") or None
super().save()
diff --git a/testgen/common/models/data_column.py b/testgen/common/models/data_column.py
new file mode 100644
index 00000000..7b344a14
--- /dev/null
+++ b/testgen/common/models/data_column.py
@@ -0,0 +1,31 @@
+from uuid import UUID, uuid4
+
+from sqlalchemy import Boolean, Column, ForeignKey, String
+from sqlalchemy.dialects import postgresql
+
+from testgen.common.models.entity import Entity
+
+
+class DataColumnChars(Entity):
+ __tablename__ = "data_column_chars"
+
+ id: UUID = Column("column_id", postgresql.UUID(as_uuid=True), primary_key=True, default=uuid4)
+ table_groups_id: UUID = Column(postgresql.UUID(as_uuid=True), ForeignKey("table_groups.id"))
+ schema_name: str = Column(String)
+ table_name: str = Column(String)
+ column_name: str = Column(String)
+ excluded_data_element: bool | None = Column(Boolean, nullable=True)
+ pii_flag: str | None = Column(String(50), nullable=True)
+
+ _default_order_by = (id,)
+
+ # Unmapped columns: table_id, ordinal_position, general_type, column_type,
+ # db_data_type, functional_data_type, description, critical_data_element,
+ # data_source, source_system, source_process, business_domain,
+ # stakeholder_group, transform_level, aggregation_level, data_product,
+ # add_date, last_mod_date, drop_date, test_ct, last_test_date,
+ # tests_last_run, tests_7_days_prior, tests_30_days_prior,
+ # fails_last_run, fails_7_days_prior, fails_30_days_prior,
+ # warnings_last_run, warnings_7_days_prior, warnings_30_days_prior,
+ # last_complete_profile_run_id, valid_profile_issue_ct,
+ # valid_test_issue_ct, dq_score_profiling, dq_score_testing
diff --git a/testgen/common/models/data_table.py b/testgen/common/models/data_table.py
new file mode 100644
index 00000000..4cfa814d
--- /dev/null
+++ b/testgen/common/models/data_table.py
@@ -0,0 +1,45 @@
+from uuid import UUID, uuid4
+
+from sqlalchemy import BigInteger, Column, ForeignKey, String, asc, func, select
+from sqlalchemy.dialects import postgresql
+
+from testgen.common.models import get_current_session
+from testgen.common.models.entity import Entity
+from testgen.common.models.table_group import TableGroup
+
+
+class DataTable(Entity):
+ __tablename__ = "data_table_chars"
+
+ id: UUID = Column("table_id", postgresql.UUID(as_uuid=True), primary_key=True, default=uuid4)
+ table_groups_id: UUID = Column(postgresql.UUID(as_uuid=True), ForeignKey("table_groups.id"))
+ table_name: str = Column(String)
+ column_ct: int = Column(BigInteger)
+
+ # Unmapped columns: schema_name, functional_table_type, description,
+ # critical_data_element, data_source, source_system, source_process,
+ # business_domain, stakeholder_group, transform_level, aggregation_level,
+ # data_product, add_date, drop_date, last_refresh_date, approx_record_ct,
+ # record_ct, last_complete_profile_run_id, last_profile_record_ct,
+ # dq_score_profiling, dq_score_testing
+
+ @classmethod
+ def select_table_names(
+ cls, table_groups_id: UUID, project_codes: list[str] | None = None, limit: int = 100, offset: int = 0,
+ ) -> list[str]:
+ query = select(cls.table_name).where(cls.table_groups_id == table_groups_id)
+ if project_codes is not None:
+ query = query.join(TableGroup, cls.table_groups_id == TableGroup.id).where(
+ TableGroup.project_code.in_(project_codes)
+ )
+ query = query.order_by(asc(func.lower(cls.table_name))).offset(offset).limit(limit)
+ return list(get_current_session().scalars(query).all())
+
+ @classmethod
+ def count_tables(cls, table_groups_id: UUID, project_codes: list[str] | None = None) -> int:
+ query = select(func.count()).select_from(cls).where(cls.table_groups_id == table_groups_id)
+ if project_codes is not None:
+ query = query.join(TableGroup, cls.table_groups_id == TableGroup.id).where(
+ TableGroup.project_code.in_(project_codes)
+ )
+ return get_current_session().scalar(query) or 0
diff --git a/testgen/common/models/entity.py b/testgen/common/models/entity.py
index 6d0b0950..3d7560de 100644
--- a/testgen/common/models/entity.py
+++ b/testgen/common/models/entity.py
@@ -118,9 +118,6 @@ def delete_where(cls, *clauses) -> None:
query = delete(cls).where(*clauses)
db_session = get_current_session()
db_session.execute(query)
- db_session.commit()
- # We clear all because cached data like Project.select_summary will be affected
- st.cache_data.clear()
@classmethod
def is_in_use(cls, ids: list[str]) -> bool:
@@ -144,24 +141,15 @@ def refresh(self) -> None:
db_session.refresh(self)
def save(self) -> None:
- is_new = self.id is None
db_session = get_current_session()
db_session.add(self)
db_session.flush([self])
- db_session.commit()
db_session.refresh(self, ["id"])
- if is_new:
- # We clear all because cached data like Project.select_summary will be affected
- st.cache_data.clear()
- else:
- self.__class__.clear_cache()
def delete(self) -> None:
db_session = get_current_session()
db_session.add(self)
db_session.delete(self)
- db_session.commit()
- self.__class__.clear_cache()
def to_dict(self, json_safe: bool = False):
result = {col.name: getattr(self, col.name) for col in self.__table__.columns}
diff --git a/testgen/common/models/profiling_run.py b/testgen/common/models/profiling_run.py
index b7059a6d..41ae7e16 100644
--- a/testgen/common/models/profiling_run.py
+++ b/testgen/common/models/profiling_run.py
@@ -246,8 +246,7 @@ def cancel_all_running(cls) -> list[UUID]:
)
db_session = get_current_session()
rows = db_session.execute(query)
- db_session.commit()
- cls.clear_cache()
+ db_session.flush()
return [r.id for r in rows]
@classmethod
@@ -255,8 +254,6 @@ def cancel_run(cls, run_id: str | UUID) -> None:
query = update(cls).where(cls.id == run_id).values(status="Cancelled", profiling_endtime=datetime.now(UTC))
db_session = get_current_session()
db_session.execute(query)
- db_session.commit()
- cls.clear_cache()
@classmethod
def cascade_delete(cls, ids: list[str]) -> None:
@@ -272,7 +269,6 @@ def cascade_delete(cls, ids: list[str]) -> None:
"""
db_session = get_current_session()
db_session.execute(text(query), {"profiling_run_ids": tuple(ids)})
- db_session.commit()
cls.delete_where(cls.id.in_(ids))
@classmethod
diff --git a/testgen/common/models/project.py b/testgen/common/models/project.py
index e39daecd..eedfb13f 100644
--- a/testgen/common/models/project.py
+++ b/testgen/common/models/project.py
@@ -2,13 +2,15 @@
from uuid import UUID, uuid4
import streamlit as st
-from sqlalchemy import Column, String, asc, func, text
+from sqlalchemy import Column, String, asc, func, select, text
from sqlalchemy.dialects import postgresql
from testgen.common.models import get_current_session
from testgen.common.models.connection import Connection
from testgen.common.models.custom_types import NullIfEmptyString
-from testgen.common.models.entity import Entity, EntityMinimal
+from testgen.common.models.entity import ENTITY_HASH_FUNCS, Entity, EntityMinimal
+from testgen.common.models.project_membership import ProjectMembership
+from testgen.common.models.user import User
@dataclass
@@ -24,6 +26,12 @@ class ProjectSummary(EntityMinimal):
can_export_to_observability: bool
+@dataclass
+class ProjectMember(EntityMinimal):
+ user: User
+ membership: ProjectMembership
+
+
class Project(Entity):
__tablename__ = "projects"
@@ -99,3 +107,22 @@ def cascade_delete(cls, ids: list[str]) -> bool:
def clear_cache(cls) -> bool:
super().clear_cache()
cls.get_summary.clear()
+ cls.get_project_members.clear()
+
+ @classmethod
+ @st.cache_data(show_spinner=False, hash_funcs=ENTITY_HASH_FUNCS)
+ def get_project_members(
+ cls,
+ project_code: str,
+ *filters,
+ _order_by: tuple = (asc(func.lower(User.username)),),
+ ) -> list[ProjectMember]:
+ """Get all users who have access to this project."""
+ query = (
+ select(User, ProjectMembership)
+ .join(ProjectMembership, User.id == ProjectMembership.user_id)
+ .where(ProjectMembership.project_code == project_code, *filters)
+ .order_by(*_order_by)
+ )
+ rows = get_current_session().execute(query).all()
+ return [ProjectMember(user=user, membership=membership) for user, membership in rows]
diff --git a/testgen/common/models/project_membership.py b/testgen/common/models/project_membership.py
new file mode 100644
index 00000000..94bcad5e
--- /dev/null
+++ b/testgen/common/models/project_membership.py
@@ -0,0 +1,82 @@
+from datetime import datetime
+from typing import Literal, Self
+from uuid import UUID, uuid4
+
+import streamlit as st
+from sqlalchemy import Column, ForeignKey, String, asc, select
+from sqlalchemy.dialects import postgresql
+
+from testgen.common.models import get_current_session
+from testgen.common.models.entity import Entity
+
+RoleType = Literal["admin", "data_quality", "analyst", "business", "catalog"]
+
+
+class ProjectMembership(Entity):
+ __tablename__ = "project_memberships"
+
+ id: UUID = Column(postgresql.UUID(as_uuid=True), primary_key=True, default=uuid4)
+ user_id: UUID = Column(
+ postgresql.UUID(as_uuid=True),
+ ForeignKey("auth_users.id", ondelete="CASCADE"),
+ nullable=False,
+ )
+ project_code: str = Column(
+ String,
+ ForeignKey("projects.project_code", ondelete="CASCADE"),
+ nullable=False,
+ )
+ role: "RoleType" = Column(String, nullable=False)
+ created_at: datetime = Column(postgresql.TIMESTAMP, default=datetime.utcnow)
+
+ _get_by = "id"
+ _default_order_by = (asc(project_code),)
+
+ @classmethod
+ @st.cache_data(show_spinner=False)
+ def get_by_user_and_project(cls, user_id: UUID, project_code: str) -> Self | None:
+ """Get a specific membership for a user in a project."""
+ query = select(cls).where(
+ cls.user_id == user_id,
+ cls.project_code == project_code,
+ )
+ return get_current_session().scalars(query).first()
+
+ @classmethod
+ @st.cache_data(show_spinner=False)
+ def get_projects_for_user(cls, user_id: UUID) -> list[str]:
+ """Get all project codes a user has access to."""
+ query = select(cls.project_code).where(cls.user_id == user_id)
+ return list(get_current_session().scalars(query).all())
+
+ @classmethod
+ @st.cache_data(show_spinner=False)
+ def get_memberships_for_user(cls, user_id: UUID) -> list[Self]:
+ """Get all memberships for a user."""
+ return list(cls.select_where(cls.user_id == user_id))
+
+ @classmethod
+ @st.cache_data(show_spinner=False)
+ def get_memberships_for_project(cls, project_code: str) -> list[Self]:
+ """Get all memberships for a project."""
+ return list(cls.select_where(cls.project_code == project_code))
+
+ @classmethod
+ def user_has_project_access(cls, user_id: UUID, project_code: str) -> bool:
+ """Check if a user has any access to a project."""
+ membership = cls.get_by_user_and_project(user_id, project_code)
+ return membership is not None
+
+ @classmethod
+ def get_user_role_in_project(cls, user_id: UUID, project_code: str) -> "RoleType | None":
+ """Get the user's role within a specific project."""
+ membership = cls.get_by_user_and_project(user_id, project_code)
+ return membership.role if membership else None
+
+ @classmethod
+ def clear_cache(cls) -> None:
+ super().clear_cache()
+ cls.get_by_user_and_project.clear()
+ cls.get_projects_for_user.clear()
+ cls.get_memberships_for_user.clear()
+ cls.get_memberships_for_project.clear()
diff --git a/testgen/common/models/scheduler.py b/testgen/common/models/scheduler.py
index fa070e03..7408501d 100644
--- a/testgen/common/models/scheduler.py
+++ b/testgen/common/models/scheduler.py
@@ -63,26 +63,12 @@ def select_where(cls, *clauses, order_by: str | InstrumentedAttribute | None = N
@classmethod
def delete(cls, job_id: str | UUID) -> None:
query = delete(cls).where(JobSchedule.id == job_id)
- db_session = get_current_session()
- try:
- db_session.execute(query)
- except ValueError:
- db_session.rollback()
- else:
- db_session.commit()
- cls.clear_cache()
+ get_current_session().execute(query)
@classmethod
def update_active(cls, job_id: str | UUID, active: bool) -> None:
query = update(cls).where(JobSchedule.id == job_id).values(active=active)
- db_session = get_current_session()
- try:
- db_session.execute(query)
- except ValueError:
- db_session.rollback()
- else:
- db_session.commit()
- cls.clear_cache()
+ get_current_session().execute(query)
@classmethod
def count(cls):
@@ -103,5 +89,3 @@ def cron_tz_str(self) -> str:
def save(self) -> None:
db_session = get_current_session()
db_session.add(self)
- db_session.commit()
- self.__class__.clear_cache()
diff --git a/testgen/common/models/scores.py b/testgen/common/models/scores.py
index 61c3ceb4..788ee00b 100644
--- a/testgen/common/models/scores.py
+++ b/testgen/common/models/scores.py
@@ -186,14 +186,12 @@ def save(self) -> None:
db_session = get_current_session()
db_session.add(self)
db_session.flush([self])
- db_session.commit()
db_session.refresh(self, ["id"])
def delete(self) -> None:
db_session = get_current_session()
db_session.add(self)
db_session.delete(self)
- db_session.commit()
def clear_results(self) -> None:
db_session = get_current_session()
diff --git a/testgen/common/models/settings.py b/testgen/common/models/settings.py
index 4d9d67c9..f98b1565 100644
--- a/testgen/common/models/settings.py
+++ b/testgen/common/models/settings.py
@@ -37,7 +37,7 @@ def set(cls, key: str, value: Any):
ps.value = value
else:
session.add(cls(key=key, value=value))
- session.commit()
+ session.flush()
def __repr__(self):
return f"{self.__class__.__name__}(key={self.key!r} value={self.value!r})"
diff --git a/testgen/common/models/table_group.py b/testgen/common/models/table_group.py
index 938a851b..724f1ba7 100644
--- a/testgen/common/models/table_group.py
+++ b/testgen/common/models/table_group.py
@@ -28,6 +28,9 @@ class TableGroupMinimal(EntityMinimal):
profile_use_sampling: bool
profiling_delay_days: str
monitor_test_suite_id: UUID | None
+ profile_flag_cdes: bool
+ profile_flag_pii: bool
+ profile_exclude_xde: bool
last_complete_profile_run_id: UUID | None
@@ -112,6 +115,8 @@ class TableGroup(Entity):
profile_sample_min_count: int = Column(BigInteger, default=100000)
profiling_delay_days: str = Column(String, default="0")
profile_flag_cdes: bool = Column(Boolean, default=True)
+ profile_flag_pii: bool = Column(Boolean, default=True)
+ profile_exclude_xde: bool = Column(Boolean, default=True)
profile_do_pair_rules: bool = Column(YNString, default="N")
profile_pair_rule_pct: int = Column(Integer, default=95)
include_in_dashboard: bool = Column(Boolean, default=True)
@@ -420,7 +425,6 @@ def cascade_delete(cls, ids: list[str]) -> None:
params = {"table_group_ids": tuple(ids)}
db_session = get_current_session()
db_session.execute(text(query), params)
- db_session.commit()
cls.delete_where(cls.id.in_(ids))
@classmethod
@@ -440,9 +444,7 @@ def save(self, add_scorecard_definition: bool = False) -> None:
query = update(TableGroup).where(TableGroup.id == self.id).values(**values)
db_session = get_current_session()
db_session.execute(query)
- db_session.commit()
else:
super().save()
if add_scorecard_definition:
ScoreDefinition.from_table_group(self).save()
- TableGroup.clear_cache()
diff --git a/testgen/common/models/test_definition.py b/testgen/common/models/test_definition.py
index c110695d..e9e2651c 100644
--- a/testgen/common/models/test_definition.py
+++ b/testgen/common/models/test_definition.py
@@ -1,17 +1,19 @@
from collections.abc import Iterable
from dataclasses import dataclass
from datetime import datetime
-from typing import Literal
-from uuid import UUID
+from typing import ClassVar, Literal
+from uuid import UUID, uuid4
import streamlit as st
from sqlalchemy import (
+ Boolean,
Column,
ForeignKey,
String,
Text,
TypeDecorator,
asc,
+ delete,
func,
insert,
select,
@@ -22,7 +24,7 @@
from sqlalchemy.orm import InstrumentedAttribute
from sqlalchemy.sql.expression import case, literal
-from testgen.common.models import get_current_session
+from testgen.common.models import Base, get_current_session
from testgen.common.models.custom_types import NullIfEmptyString, UpdateTimestamp, YNString, ZeroIfEmptyInteger
from testgen.common.models.entity import ENTITY_HASH_FUNCS, Entity, EntityMinimal
from testgen.utils import is_uuid4
@@ -41,6 +43,7 @@ class TestTypeSummary(EntityMinimal):
default_parm_columns: str
default_parm_prompts: str
default_parm_help: str
+ default_parm_required: str
default_severity: str
test_scope: TestScope
usage_notes: str
@@ -91,7 +94,8 @@ class TestDefinitionSummary(TestTypeSummary):
profiling_as_of_date: datetime
last_manual_update: datetime
export_to_observability: bool
- prediction: str | None
+ prediction: dict[str, dict[str, float]] | None
+ flagged: bool
@dataclass
@@ -123,6 +127,8 @@ def process_bind_param(self, value: str | None, _dialect) -> str | None:
class TestType(Entity):
__tablename__ = "test_types"
+ _get_by = "test_type"
+
id: str = Column(String)
test_type: str = Column(String, primary_key=True, nullable=False)
test_name_short: str = Column(String)
@@ -140,6 +146,7 @@ class TestType(Entity):
default_parm_values: str = Column(Text)
default_parm_prompts: str = Column(Text)
default_parm_help: str = Column(Text)
+ default_parm_required: str = Column(Text)
default_severity: str = Column(String)
run_type: TestRunType = Column(String)
test_scope: TestScope = Column(String)
@@ -149,6 +156,8 @@ class TestType(Entity):
usage_notes: str = Column(String)
active: str = Column(String)
+ # Unmapped columns: generation_template, result_visualization, result_visualization_params
+
_summary_columns = (
*[key for key in TestTypeSummary.__annotations__.keys() if key != "default_test_description"],
test_description.label("default_test_description"),
@@ -211,6 +220,7 @@ class TestDefinition(Entity):
last_manual_update: datetime = Column(UpdateTimestamp, nullable=False)
export_to_observability: bool = Column(YNString)
prediction: dict[str, dict[str, float]] | None = Column(postgresql.JSONB)
+ flagged: bool = Column(Boolean, default=False, nullable=False)
_default_order_by = (asc(func.lower(schema_name)), asc(func.lower(table_name)), asc(func.lower(column_name)), asc(test_type))
_summary_columns = (
@@ -276,10 +286,12 @@ def select_minimal_where(
)
return [TestDefinitionMinimal(**row) for row in results]
+ _yn_columns: ClassVar = {"test_active", "lock_refresh"}
+
@classmethod
def set_status_attribute(
cls,
- status_type: Literal["test_active", "lock_refresh"],
+ status_type: Literal["test_active", "lock_refresh", "flagged"],
test_definition_ids: list[str | UUID],
value: bool,
) -> None:
@@ -296,13 +308,11 @@ def set_status_attribute(
"""
params = {
"test_definition_ids": test_definition_ids,
- "value": YNString().process_bind_param(value, None),
+ "value": YNString().process_bind_param(value, None) if status_type in cls._yn_columns else value,
}
db_session = get_current_session()
db_session.execute(text(query), params)
- db_session.commit()
- cls.clear_cache()
@classmethod
def move(
@@ -318,7 +328,7 @@ def move(
SELECT UNNEST(ARRAY [:test_definition_ids]) AS id
)
UPDATE test_definitions
- SET
+ SET
{"table_name = :target_table_name," if target_table_name else ""}
{"column_name = :target_column_name," if target_column_name else ""}
table_groups_id = :target_table_group,
@@ -337,8 +347,6 @@ def move(
db_session = get_current_session()
db_session.execute(text(query), params)
- db_session.commit()
- cls.clear_cache()
@classmethod
def copy(
@@ -379,8 +387,6 @@ def copy(
)
db_session = get_current_session()
db_session.execute(query)
- db_session.commit()
- cls.clear_cache()
@classmethod
def clear_cache(cls) -> bool:
@@ -397,8 +403,69 @@ def save(self) -> None:
query = update(TestDefinition).where(TestDefinition.id == self.id).values(**values)
db_session = get_current_session()
db_session.execute(query)
- db_session.commit()
else:
super().save()
- TestDefinition.clear_cache()
+
+class TestDefinitionNote(Base):
+ __tablename__ = "test_definition_notes"
+
+ id: UUID = Column(postgresql.UUID(as_uuid=True), default=uuid4, primary_key=True)
+ test_definition_id: UUID = Column(
+ postgresql.UUID(as_uuid=True), ForeignKey("test_definitions.id", ondelete="CASCADE"), nullable=False
+ )
+ detail: str = Column(Text, nullable=False)
+ created_by: str = Column(String(100), nullable=False)
+ created_at: datetime = Column(postgresql.TIMESTAMP, server_default=text("CURRENT_TIMESTAMP"))
+ updated_at: datetime = Column(postgresql.TIMESTAMP)
+
+ @classmethod
+ def add_note(cls, test_definition_id: str | UUID, detail: str, username: str) -> None:
+ db_session = get_current_session()
+ db_session.execute(
+ insert(cls).values(test_definition_id=test_definition_id, detail=detail, created_by=username)
+ )
+
+ @classmethod
+ def update_note(cls, note_id: str | UUID, detail: str) -> None:
+ db_session = get_current_session()
+ db_session.execute(
+ update(cls).where(cls.id == note_id).values(detail=detail, updated_at=func.now())
+ )
+
+ @classmethod
+ def delete_note(cls, note_id: str | UUID) -> None:
+ db_session = get_current_session()
+ db_session.execute(delete(cls).where(cls.id == note_id))
+
+ @classmethod
+ def get_notes_count_by_ids(cls, test_definition_ids: list[str]) -> dict[str, int]:
+ """Returns {test_definition_id: count} for all given IDs."""
+ db_session = get_current_session()
+ rows = db_session.execute(
+ text("""
+ SELECT test_definition_id::VARCHAR, COUNT(*) as cnt
+ FROM test_definition_notes
+ WHERE test_definition_id = ANY(:ids)
+ GROUP BY test_definition_id
+ """),
+ {"ids": [UUID(td_id) for td_id in test_definition_ids]},
+ ).all()
+ return {str(row[0]): row[1] for row in rows}
+
+ @classmethod
+ def get_notes(cls, test_definition_id: str | UUID) -> list[dict]:
+ db_session = get_current_session()
+ results = db_session.execute(
+ select(cls).where(cls.test_definition_id == test_definition_id).order_by(cls.created_at.desc())
+ ).scalars().all()
+ return [
+ {
+ "id": str(note.id),
+ "detail": note.detail,
+ "created_by": note.created_by,
+ "created_at": note.created_at.isoformat() if note.created_at else None,
+ "updated_at": note.updated_at.isoformat() if note.updated_at else None,
+ }
+ for note in results
+ ]
diff --git a/testgen/common/models/test_result.py b/testgen/common/models/test_result.py
index dd8d9ded..8e517900 100644
--- a/testgen/common/models/test_result.py
+++ b/testgen/common/models/test_result.py
@@ -1,13 +1,16 @@
import enum
from collections import defaultdict
+from datetime import datetime
+from typing import Self
from uuid import UUID, uuid4
-from sqlalchemy import Boolean, Column, Enum, ForeignKey, Integer, String, or_, select
+from sqlalchemy import Boolean, Column, Enum, ForeignKey, Integer, String, desc, func, or_, select
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import aliased
from testgen.common.models import get_current_session
from testgen.common.models.entity import Entity
+from testgen.common.models.test_suite import TestSuite
class TestResultStatus(enum.Enum):
@@ -40,8 +43,93 @@ class TestResult(Entity):
status: TestResultStatus = Column("result_status", Enum(TestResultStatus))
message: str = Column("result_message", String)
+ test_time: datetime = Column(postgresql.TIMESTAMP)
result_code: int = Column(Integer)
- # Note: not all table columns are implemented by this entity
+ disposition: str = Column(String)
+ result_measure: str = Column(String)
+ threshold_value: str = Column(String)
+
+ # Unmapped columns: result_id, skip_errors, input_parameters, severity,
+ # result_signal, test_description, table_groups_id, dq_prevalence,
+ # dq_record_ct, observability_status
+
+ @classmethod
+ def select_results(
+ cls,
+ test_run_id: UUID,
+ status: TestResultStatus | None = None,
+ table_name: str | None = None,
+ test_type: str | None = None,
+ project_codes: list[str] | None = None,
+ limit: int = 50,
+ offset: int = 0,
+ ) -> list[Self]:
+ clauses = [
+ cls.test_run_id == test_run_id,
+ func.coalesce(cls.disposition, "Confirmed") == "Confirmed",
+ ]
+ if status:
+ clauses.append(cls.status == status)
+ if table_name:
+ clauses.append(cls.table_name == table_name)
+ if test_type:
+ clauses.append(cls.test_type == test_type)
+ query = select(cls).where(*clauses)
+ if project_codes is not None:
+ query = query.join(TestSuite, cls.test_suite_id == TestSuite.id).where(
+ TestSuite.project_code.in_(project_codes)
+ )
+ query = query.order_by(cls.status, cls.table_name, cls.column_names).offset(offset).limit(limit)
+ return get_current_session().scalars(query).all()
+
+ @classmethod
+ def select_failures(
+ cls,
+ test_run_id: UUID,
+ project_codes: list[str] | None = None,
+ group_by: str = "test_type",
+ ) -> list[tuple]:
+ allowed = {"test_type", "table_name", "column_names"}
+ if group_by not in allowed:
+ raise ValueError(f"group_by must be one of {allowed}")
+
+ where = [
+ cls.test_run_id == test_run_id,
+ cls.status.in_([TestResultStatus.Failed, TestResultStatus.Warning]),
+ func.coalesce(cls.disposition, "Confirmed") == "Confirmed",
+ ]
+
+ # Column grouping includes table_name for context → (table, column, count)
+ if group_by == "column_names":
+ group_cols = (cls.table_name, cls.column_names)
+ elif group_by == "test_type":
+ group_cols = (cls.test_type, cls.status)
+ else:
+ group_cols = (getattr(cls, group_by),)
+
+ query = select(*group_cols, func.count().label("failure_count")).where(*where)
+ if project_codes is not None:
+ query = query.join(TestSuite, cls.test_suite_id == TestSuite.id).where(
+ TestSuite.project_code.in_(project_codes)
+ )
+ query = query.group_by(*group_cols).order_by(func.count().desc())
+ return get_current_session().execute(query).all()
+
+ @classmethod
+ def select_history(
+ cls,
+ test_definition_id: UUID,
+ project_codes: list[str] | None = None,
+ limit: int = 20,
+ offset: int = 0,
+ ) -> list[Self]:
+ query = select(cls).where(cls.test_definition_id == test_definition_id)
+ if project_codes is not None:
+ query = query.join(TestSuite, cls.test_suite_id == TestSuite.id).where(
+ TestSuite.project_code.in_(project_codes)
+ )
+ query = query.order_by(desc(cls.test_time)).offset(offset).limit(limit)
+ return get_current_session().scalars(query).all()
@classmethod
def diff(cls, test_run_id_a: UUID, test_run_id_b: UUID) -> list[TestResultDiffType]:
diff --git a/testgen/common/models/test_run.py b/testgen/common/models/test_run.py
index 3709328a..1517bb4e 100644
--- a/testgen/common/models/test_run.py
+++ b/testgen/common/models/test_run.py
@@ -1,7 +1,7 @@
from collections.abc import Iterable
from dataclasses import dataclass
from datetime import UTC, datetime
-from typing import Literal, NamedTuple, Self, TypedDict
+from typing import ClassVar, Literal, NamedTuple, Self, TypedDict
from uuid import UUID, uuid4
import streamlit as st
@@ -49,6 +49,7 @@ class TestRunSummary(EntityMinimal):
test_endtime: datetime
table_groups_name: str
test_suite: str
+ project_code: str
project_name: str
status: TestRunStatus
progress: list[ProgressStep]
@@ -63,6 +64,15 @@ class TestRunSummary(EntityMinimal):
dismissed_ct: int
dq_score_testing: float
+ STATUS_LABEL: ClassVar[dict[str, str]] = {
+ "Complete": "Completed",
+ "Cancelled": "Canceled",
+ }
+
+ @property
+ def status_label(self) -> str:
+ return self.STATUS_LABEL.get(self.status, self.status)
+
@dataclass
class TestRunMonitorSummary(EntityMinimal):
@@ -238,6 +248,7 @@ def select_summary(
test_runs.test_endtime,
table_groups.table_groups_name,
test_suites.test_suite,
+ test_suites.project_code,
projects.project_name,
test_runs.status,
test_runs.progress,
@@ -337,8 +348,7 @@ def cancel_all_running(cls) -> list[UUID]:
)
db_session = get_current_session()
rows = db_session.execute(query)
- db_session.commit()
- cls.clear_cache()
+ db_session.flush()
return [r.id for r in rows]
@classmethod
@@ -346,8 +356,6 @@ def cancel_run(cls, run_id: str | UUID) -> None:
query = update(cls).where(cls.id == run_id).values(status="Cancelled", test_endtime=datetime.now(UTC))
db_session = get_current_session()
db_session.execute(query)
- db_session.commit()
- cls.clear_cache()
@classmethod
def cascade_delete(cls, ids: list[str]) -> None:
@@ -357,7 +365,6 @@ def cascade_delete(cls, ids: list[str]) -> None:
"""
db_session = get_current_session()
db_session.execute(text(query), {"test_run_ids": tuple(ids)})
- db_session.commit()
cls.delete_where(cls.id.in_(ids))
@classmethod
diff --git a/testgen/common/models/test_suite.py b/testgen/common/models/test_suite.py
index a8c35b8d..229094a6 100644
--- a/testgen/common/models/test_suite.py
+++ b/testgen/common/models/test_suite.py
@@ -101,7 +101,7 @@ def select_minimal_where(
@classmethod
@st.cache_data(show_spinner=False)
- def select_summary(cls, project_code: str, table_group_id: str | UUID | None = None) -> Iterable[TestSuiteSummary]:
+ def select_summary(cls, project_code: str, table_group_id: str | UUID | None = None, test_suite_name: str | None = None) -> Iterable[TestSuiteSummary]:
if table_group_id and not is_uuid4(table_group_id):
return []
@@ -199,9 +199,10 @@ def select_summary(cls, project_code: str, table_group_id: str | UUID | None = N
WHERE suites.is_monitor IS NOT TRUE
AND suites.project_code = :project_code
{"AND suites.table_groups_id = :table_group_id" if table_group_id else ""}
+ {"AND suites.test_suite ILIKE :test_suite_name" if test_suite_name else ""}
ORDER BY LOWER(suites.test_suite);
"""
- params = {"project_code": project_code, "table_group_id": table_group_id}
+ params = {"project_code": project_code, "table_group_id": table_group_id, "test_suite_name": f"%{test_suite_name}%" if test_suite_name else None}
db_session = get_current_session()
results = db_session.execute(text(query), params).mappings().all()
return [TestSuiteSummary(**row) for row in results]
@@ -246,7 +247,6 @@ def cascade_delete(cls, ids: list[str]) -> None:
"""
db_session = get_current_session()
db_session.execute(text(query), {"test_suite_ids": tuple(ids)})
- db_session.commit()
cls.delete_where(cls.id.in_(ids))
@classmethod
diff --git a/testgen/common/models/user.py b/testgen/common/models/user.py
index cc6f57c6..b4e1d575 100644
--- a/testgen/common/models/user.py
+++ b/testgen/common/models/user.py
@@ -1,16 +1,15 @@
from datetime import UTC, datetime
-from typing import Literal, Self
+from typing import Self
from uuid import UUID, uuid4
import streamlit as st
-from sqlalchemy import Column, String, asc, func, select, update
+from sqlalchemy import Boolean, Column, String, asc, func, select, update
from sqlalchemy.dialects import postgresql
from testgen.common.models import get_current_session
from testgen.common.models.custom_types import NullIfEmptyString
from testgen.common.models.entity import Entity
-
-RoleType = Literal["admin", "data_quality", "analyst", "business", "catalog"]
+from testgen.common.models.project_membership import RoleType
class User(Entity):
@@ -21,7 +20,7 @@ class User(Entity):
email: str = Column(NullIfEmptyString)
name: str = Column(NullIfEmptyString)
password: str = Column(String)
- role: RoleType = Column(String)
+ is_global_admin: bool = Column(Boolean, nullable=False, default=False)
latest_login: datetime = Column(postgresql.TIMESTAMP)
_get_by = "username"
@@ -37,8 +36,6 @@ def save(self, update_latest_login: bool = False) -> None:
query = update(User).where(User.id == self.id).values(**values)
db_session = get_current_session()
db_session.execute(query)
- db_session.commit()
- User.clear_cache()
else:
if update_latest_login:
self.latest_login = datetime.now(UTC)
@@ -49,3 +46,18 @@ def save(self, update_latest_login: bool = False) -> None:
def get(cls, identifier: str) -> Self | None:
query = select(cls).where(func.lower(User.username) == func.lower(identifier))
return get_current_session().scalars(query).first()
+
+ def get_accessible_projects(self) -> list[str]:
+ """Get all projects this user can access."""
+ from testgen.common.models.project_membership import ProjectMembership
+ return ProjectMembership.get_projects_for_user(self.id)
+
+ def get_role_in_project(self, project_code: str) -> RoleType | None:
+ """Get this user's role in a specific project."""
+ from testgen.common.models.project_membership import ProjectMembership
+ return ProjectMembership.get_user_role_in_project(self.id, project_code)
+
+ def has_project_access(self, project_code: str) -> bool:
+ """Check if user has access to a project."""
+ from testgen.common.models.project_membership import ProjectMembership
+ return ProjectMembership.user_has_project_access(self.id, project_code)
diff --git a/testgen/common/notifications/monitor_run.py b/testgen/common/notifications/monitor_run.py
index c3893aad..4a7153d5 100644
--- a/testgen/common/notifications/monitor_run.py
+++ b/testgen/common/notifications/monitor_run.py
@@ -64,9 +64,6 @@ def get_main_content_template(self):
border="0">
Anomalies Summary
-
- View on TestGen >
-
@@ -112,6 +109,11 @@ def get_main_content_template(self):
{{/if}}
+
+
+ View on TestGen >
+
+
"""
diff --git a/testgen/common/notifications/notifications.py b/testgen/common/notifications/notifications.py
index b4343e2e..68e20732 100644
--- a/testgen/common/notifications/notifications.py
+++ b/testgen/common/notifications/notifications.py
@@ -393,7 +393,7 @@ def get_body_template(self) -> str:
Table Group
@@ -71,6 +67,11 @@ def get_main_content_template(self):
Duration
{{format_duration profiling_run.start_time profiling_run.end_time}}
+
+
+ View results on TestGen >
+
+
{{#each hygiene_issues_summary}}
@@ -165,11 +168,6 @@ def get_result_table_template(self):
{{#if (eq priority 'High')}} text-red {{/if}}
{{#if (eq priority 'Moderate')}} text-orange {{/if}}
">{{label}}
-
-
- View {{format_number count.total}} {{label}} >
-
-
{{#if (len issues)}}
@@ -189,13 +187,18 @@ def get_result_table_template(self):
{{/each}}
-
-
+
+
+
+ View {{format_number count.total}} {{label}} >
+
+
+
{{#if truncated}}
+ {{truncated}} more
{{/if}}
-
+
●
indicates new issues
@@ -258,7 +261,13 @@ def send_profiling_run_notifications(profiling_run: ProfilingRun, result_list_ct
return
profiling_run_issues_url = "".join(
- (PersistedSetting.get("BASE_URL", ""), "/profiling-runs:hygiene?run_id=", str(profiling_run.id), "&source=email")
+ (
+ PersistedSetting.get("BASE_URL", ""),
+ "/profiling-runs:hygiene?project_code=",
+ str(profiling_run.project_code),
+ "&run_id=", str(profiling_run.id),
+ "&source=email"
+ )
)
hygiene_issues_summary = []
@@ -304,7 +313,14 @@ def send_profiling_run_notifications(profiling_run: ProfilingRun, result_list_ct
"id": str(profiling_run.id),
"issues_url": profiling_run_issues_url,
"results_url": "".join(
- (PersistedSetting.get("BASE_URL", ""), "/profiling-runs:results?run_id=", str(profiling_run.id), "&source=email")
+ (
+ PersistedSetting.get("BASE_URL", ""),
+ "/profiling-runs:results?project_code=",
+ str(profiling_run.project_code),
+ "&run_id=",
+ str(profiling_run.id),
+ "&source=email"
+ )
),
"start_time": profiling_run.profiling_starttime,
"end_time": profiling_run.profiling_endtime,
diff --git a/testgen/common/notifications/score_drop.py b/testgen/common/notifications/score_drop.py
index 1bf33d87..dbcaa498 100644
--- a/testgen/common/notifications/score_drop.py
+++ b/testgen/common/notifications/score_drop.py
@@ -45,9 +45,6 @@ def get_main_content_template(self):
Project
{{project_name}}
-
- View on TestGen >
-
Scorecard
@@ -62,6 +59,11 @@ def get_main_content_template(self):
{{/each}}
+
+
+ View on TestGen >
+
+
Results Summary
- {{#if (eq test_run.status 'Complete')}}
-
- View on TestGen >
-
- {{/if}}
@@ -144,6 +139,13 @@ def get_main_content_template(self):
{{test_run.log_message}}
{{/if}}
+ {{#if (eq test_run.status 'Complete')}}
+
+
+ View on TestGen >
+
+
+ {{/if}}
{{#each test_result_summary}}
@@ -167,11 +169,6 @@ def get_result_table_template(self):
{{#if (eq status 'Warning')}} text-orange {{/if}}
{{#if (eq status 'Error')}} text-brown {{/if}}
">{{label}}
-
-
- View {{format_number total}} {{label}} >
-
-
@@ -190,13 +187,18 @@ def get_result_table_template(self):
{{/each}}
-
-
+
+
+
+ View {{format_number total}} {{label}} >
+
+
+
{{#if truncated}}
+ {{truncated}} more
{{/if}}
-
+
●
indicates new {{label}}
@@ -324,9 +326,11 @@ def send_test_run_notifications(test_run: TestRun, result_list_ct=20, result_sta
test_run_url = "".join(
(
PersistedSetting.get("BASE_URL", ""),
- "/test-runs:results?run_id=",
+ "/test-runs:results?project_code=",
+ str(tr_summary.project_code),
+ "&run_id=",
str(test_run.id),
- "&source=email",
+ "&source=email"
)
)
diff --git a/testgen/common/pii_masking.py b/testgen/common/pii_masking.py
new file mode 100644
index 00000000..70b6a659
--- /dev/null
+++ b/testgen/common/pii_masking.py
@@ -0,0 +1,96 @@
+"""PII masking utilities for redacting sensitive data in the UI."""
+import pandas as pd
+
+from testgen.ui.services.database_service import fetch_all_from_db
+
+PII_REDACTED = "[PII Redacted]"
+
+PROFILING_PII_FIELDS = (
+ "top_freq_values", "min_text", "max_text",
+ "min_value", "min_value_over_0", "max_value",
+ "min_date", "max_date",
+)
+
+
+def get_pii_columns(table_group_id: str, schema: str | None = None, table_name: str | None = None) -> set[str]:
+ """Look up PII-flagged column names from data_column_chars."""
+
+ query = f"""
+ SELECT column_name
+ FROM data_column_chars
+ WHERE table_groups_id = :table_group_id
+ AND pii_flag IS NOT NULL
+ {"AND schema_name = :schema" if schema else ""}
+ {"AND table_name = :table_name" if table_name else ""}
+ """
+ params: dict = {
+ "table_group_id": table_group_id,
+ "schema": schema,
+ "table_name": table_name,
+ }
+
+ results = fetch_all_from_db(query, params)
+ return {row.column_name for row in results}
+
+
+def mask_source_data_pii(df: pd.DataFrame, pii_columns: set[str]) -> None:
+ """In-place mask values in PII columns with PII_REDACTED."""
+ if df.empty or not pii_columns:
+ return
+ for col in pii_columns:
+ # Match case-insensitively since column names may differ in case
+ for df_col in df.columns:
+ if df_col.lower() == col.lower():
+ df[df_col] = PII_REDACTED
+
+
+def mask_hygiene_detail(data: pd.DataFrame | list[dict], pii_columns: set[str] | None = None) -> None:
+ """Redact hygiene issue detail for PII columns where detail_redactable is true.
+
+ Accepts:
+ - DataFrame with detail_redactable, pii_flag, and detail columns (hygiene issues grid/export)
+ - List of issue dicts, each with detail_redactable and either pii_flag or column_name
+ (when pii_columns set is provided, matches column_name against it)
+ """
+ if isinstance(data, pd.DataFrame):
+ if data.empty or "detail_redactable" not in data.columns:
+ return
+ pii_mask = data["detail_redactable"].fillna(False) & data["pii_flag"].notna()
+ data.loc[pii_mask, "detail"] = PII_REDACTED
+ return
+
+ if not data:
+ return
+ pii_lower = {c.lower() for c in pii_columns} if pii_columns else None
+ for issue in data:
+ if not issue.get("detail_redactable"):
+ continue
+ if pii_lower is not None:
+ if issue.get("column_name", "").lower() in pii_lower:
+ issue["detail"] = PII_REDACTED
+ elif issue.get("pii_flag"):
+ issue["detail"] = PII_REDACTED
+
+
+def mask_profiling_pii(data: pd.DataFrame | dict, pii_columns: set[str]) -> None:
+ """Mask profiling fields for PII columns. Accepts a DataFrame or a single-row dict."""
+ if isinstance(data, dict):
+ if not pii_columns:
+ return
+ column_name = data.get("column_name")
+ if column_name and column_name.lower() not in {c.lower() for c in pii_columns}:
+ return
+ for field in PROFILING_PII_FIELDS:
+ if field in data:
+ data[field] = PII_REDACTED
+ return
+
+ if data.empty or not pii_columns:
+ return
+ pii_lower = {c.lower() for c in pii_columns}
+ mask = data["column_name"].str.lower().isin(pii_lower)
+ for field in PROFILING_PII_FIELDS:
+ if field in data.columns:
+ if data[field].dtype != object:
+ data[field] = data[field].astype(object)
+ data.loc[mask, field] = PII_REDACTED
diff --git a/testgen/mcp/__init__.py b/testgen/mcp/__init__.py
new file mode 100644
index 00000000..bf4de795
--- /dev/null
+++ b/testgen/mcp/__init__.py
@@ -0,0 +1,12 @@
+from testgen import settings
+from testgen.common.models.settings import PersistedSetting
+
+
+def get_server_url() -> str:
+ """Derive the externally-reachable MCP server URL from the persisted BASE_URL."""
+ base_url = PersistedSetting.get("BASE_URL", "")
+ if base_url:
+ scheme, _, host_port = base_url.partition("://")
+ host = host_port.split(":")[0]
+ return f"{scheme}://{host}:{settings.MCP_PORT}"
+ return f"http://localhost:{settings.MCP_PORT}"
diff --git a/testgen/mcp/auth.py b/testgen/mcp/auth.py
new file mode 100644
index 00000000..71ce8b20
--- /dev/null
+++ b/testgen/mcp/auth.py
@@ -0,0 +1,32 @@
+from testgen.common.auth import check_permission, create_jwt_token, decode_jwt_token, verify_password
+from testgen.common.models.user import User
+
+__all__ = ["authenticate_user", "check_permission", "validate_token"]
+
+
+def authenticate_user(username: str, password: str) -> str:
+ """Verify credentials and return a JWT token."""
+ user = User.get(username)
+
+ if user is None:
+ raise ValueError("Invalid username or password")
+
+ if not verify_password(password, user.password):
+ raise ValueError("Invalid username or password")
+
+ return create_jwt_token(user.username)
+
+
+def validate_token(token: str) -> User:
+ """Decode and validate a JWT token, returning the User."""
+ payload = decode_jwt_token(token)
+
+ username = payload.get("username")
+ if not username:
+ raise ValueError("Token missing username")
+
+ user = User.get(username)
+ if user is None:
+ raise ValueError(f"User not found: {username}")
+
+ return user
diff --git a/testgen/mcp/exceptions.py b/testgen/mcp/exceptions.py
new file mode 100644
index 00000000..dc8d1444
--- /dev/null
+++ b/testgen/mcp/exceptions.py
@@ -0,0 +1,44 @@
+"""MCP exception hierarchy and error boundary.
+
+``MCPUserError`` (and its subclasses) carry safe, user-facing messages.
+``mcp_error_boundary`` is a decorator that catches them and converts to
+text, while neutralising unexpected exceptions.
+"""
+
+import functools
+import logging
+
+LOG = logging.getLogger("testgen")
+
+
+class MCPUserError(Exception):
+ """Safe, user-facing error for MCP tools, prompts, and resources.
+
+ The error boundary converts ``str(e)`` into the response text.
+ All other exceptions are treated as unexpected: their traceback is
+ logged and a neutral message is returned to the client.
+ """
+
+
+class MCPPermissionDenied(MCPUserError):
+ """Raised when access is denied due to insufficient project permissions."""
+
+
+def mcp_error_handler(fn):
+ """Wrap an MCP handler (tool, resource, or prompt) with safe error handling.
+
+ - ``MCPUserError`` (including ``MCPPermissionDenied``) → ``str(e)`` as the response.
+ - Any other exception → traceback logged, neutral message returned.
+ """
+
+ @functools.wraps(fn)
+ def wrapper(*args, **kwargs):
+ try:
+ return fn(*args, **kwargs)
+ except MCPUserError as e:
+ return str(e)
+ except Exception:
+ LOG.exception("Unhandled error in MCP handler '%s'", fn.__name__)
+ return "An unexpected error occurred."
+
+ return wrapper
diff --git a/testgen/mcp/permissions.py b/testgen/mcp/permissions.py
new file mode 100644
index 00000000..47ac21da
--- /dev/null
+++ b/testgen/mcp/permissions.py
@@ -0,0 +1,127 @@
+"""MCP permission enforcement — project-level and role-based access filtering."""
+
+import contextvars
+import functools
+from collections.abc import Callable
+from dataclasses import dataclass
+
+from testgen.common.models.project_membership import ProjectMembership
+from testgen.common.models.user import User
+from testgen.mcp.exceptions import MCPPermissionDenied
+from testgen.utils.plugins import PluginHook
+
+_NOT_SET = object()
+
+_mcp_username: contextvars.ContextVar[str | None] = contextvars.ContextVar("mcp_username", default=None)
+_mcp_project_permissions: contextvars.ContextVar["ProjectPermissions | object"] = contextvars.ContextVar(
+ "mcp_project_permissions", default=_NOT_SET
+)
+
+
+@dataclass(frozen=True, slots=True)
+class ProjectPermissions:
+ memberships: dict[str, str] # {project_code: role}
+ permission: str
+
+ def codes_allowed_to(self, permission: str) -> list[str]:
+ """Project codes where the user's role includes the given permission."""
+ allowed_roles = PluginHook.instance().rbac.get_roles_with_permission(permission)
+ return [code for code, role in self.memberships.items() if role in allowed_roles]
+
+ @property
+ def allowed_codes(self) -> list[str]:
+ """Project codes for the decorator's permission."""
+ return self.codes_allowed_to(self.permission)
+
+ def has_access(self, project_code: str) -> bool:
+ """For filtering lists — no exception, just a bool."""
+ return project_code in self.allowed_codes
+
+ def verify_access(self, project_code: str, not_found: str) -> None:
+ """Raise MCPPermissionDenied if user can't access this project.
+
+ - Has access: passes.
+ - Has membership but wrong role: raises with denial message.
+ - No membership: raises with not_found (hides project existence).
+ """
+ if project_code in self.allowed_codes:
+ return
+ if project_code in self.memberships:
+ raise MCPPermissionDenied(
+ "Your role on this project does not include the necessary permission for this operation."
+ )
+ raise MCPPermissionDenied(not_found)
+
+
+def set_mcp_username(username: str | None) -> None:
+ """Store the authenticated username (called by JWTTokenVerifier)."""
+ _mcp_username.set(username)
+
+
+def get_current_mcp_user() -> User:
+ """Get the authenticated User for the current MCP request.
+
+ Must be called within @with_database_session scope.
+ """
+ username = _mcp_username.get()
+ if not username:
+ raise RuntimeError("No authenticated user in MCP context")
+ user = User.get(username)
+ if user is None:
+ raise ValueError(f"Authenticated user not found: {username}")
+ return user
+
+
+def _compute_project_permissions(user: User, permission: str) -> ProjectPermissions:
+ """Build a ProjectPermissions for the given user and permission."""
+ memberships_list = ProjectMembership.get_memberships_for_user(user.id)
+ return ProjectPermissions(
+ memberships={m.project_code: m.role for m in memberships_list},
+ permission=permission,
+ )
+
+
+def get_project_permissions() -> "ProjectPermissions":
+ """Retrieve the ProjectPermissions computed by @mcp_permission for the current request.
+
+ Raises RuntimeError if called without @mcp_permission — prevents silent
+ unfiltered access when a developer forgets to add the decorator.
+ """
+ value = _mcp_project_permissions.get()
+ if value is _NOT_SET:
+ raise RuntimeError(
+ "get_project_permissions() called without @mcp_permission — add the decorator to this tool"
+ )
+ return value # type: ignore[return-value]
+
+
+def mcp_permission(permission: str) -> Callable:
+ """Decorator that enforces role-based project filtering for MCP tools.
+
+ Resolves the authenticated user, computes a ProjectPermissions for the given
+ permission, and stores it in a ContextVar. The tool retrieves the value
+ via ``get_project_permissions()``.
+
+ Raises ``MCPPermissionDenied`` if the user has no projects with the required
+ permission. Other ``MCPPermissionDenied`` exceptions from tool code propagate
+ through — the ``safe_tool`` error boundary handles conversion to text.
+ """
+
+ def decorator(fn: Callable) -> Callable:
+ @functools.wraps(fn)
+ def wrapper(*args, **kwargs):
+ user = get_current_mcp_user()
+ perms = _compute_project_permissions(user, permission)
+ if not perms.allowed_codes:
+ raise MCPPermissionDenied(
+ "Your role does not include the necessary permission for this operation on any project."
+ )
+ tok = _mcp_project_permissions.set(perms)
+ try:
+ return fn(*args, **kwargs)
+ finally:
+ _mcp_project_permissions.reset(tok)
+
+ return wrapper
+
+ return decorator
diff --git a/testgen/mcp/prompts/__init__.py b/testgen/mcp/prompts/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/testgen/mcp/prompts/workflows.py b/testgen/mcp/prompts/workflows.py
new file mode 100644
index 00000000..03ae15de
--- /dev/null
+++ b/testgen/mcp/prompts/workflows.py
@@ -0,0 +1,86 @@
+def health_check() -> str:
+ """Run a data quality health check across all projects and test suites.
+
+ Provides a comprehensive overview of the current data quality status.
+ """
+ return """\
+Please perform a data quality health check:
+
+1. Call `get_data_inventory()` to get a complete overview of all projects, connections, table groups, and test suites.
+2. For each project, call `get_recent_test_runs(...)` to get the latest test runs across all suites.
+3. Summarize the overall health:
+ - Which projects/suites are healthy (all tests passing)?
+ - Which have failures or warnings?
+ - Which have not been run recently?
+4. Highlight any critical issues that need immediate attention.
+5. Provide actionable recommendations for improving data quality.
+"""
+
+
+def investigate_failures(test_suite: str | None = None) -> str:
+ """Investigate test failures to identify root causes and patterns.
+
+ Args:
+ test_suite: Optional test suite name to focus the investigation on.
+ """
+ suite_filter = f" Focus on the test suite named `{test_suite}`." if test_suite else ""
+
+ return f"""\
+Please investigate test failures and identify root causes:{suite_filter}
+
+1. Call `get_data_inventory()` to understand the project structure.
+2. Call `get_recent_test_runs(...)` to find the latest run per suite{f" for suite `{test_suite}`" if test_suite else ""}.
+3. Call `get_failure_summary(test_run_id='...')` to see failures grouped by test type.
+4. For each failure category, call `get_test_type(test_type='...')` to understand what the test checks.
+5. Call `get_test_results(test_run_id='...', status='Failed')` to see individual failure details.
+6. Analyze the patterns:
+ - Are failures concentrated in specific tables or columns?
+ - Do certain test types fail consistently?
+ - What do the measured values vs thresholds tell us about the root cause?
+7. Provide a root cause analysis and recommended remediation steps.
+"""
+
+
+def table_health(table_name: str) -> str:
+ """Assess the data quality health of a specific table across all test suites.
+
+ Args:
+ table_name: The name of the table to investigate.
+ """
+ return f"""\
+Please assess the data quality health of table `{table_name}`:
+
+1. Call `get_data_inventory()` to discover all table groups.
+2. For each table group, call `list_tables(table_group_id='...')` to check if it contains `{table_name}`.
+3. For each relevant test suite, call `get_recent_test_runs(...)` to find the latest run.
+4. Call `get_test_results(test_run_id='...', table_name='{table_name}')` to get all results for this table.
+5. Summarize the table's health:
+ - Which tests pass and which fail?
+ - What data quality dimensions are affected?
+ - Are there patterns in the failures (e.g., specific columns)?
+6. Provide recommendations for improving data quality for this table.
+"""
+
+
+def compare_runs(test_suite: str | None = None) -> str:
+ """Compare the two most recent test runs to identify regressions and improvements.
+
+ Args:
+ test_suite: Optional test suite name to focus the comparison on.
+ """
+ suite_filter = f" for suite `{test_suite}`" if test_suite else ""
+
+ return f"""\
+Please compare the two most recent test runs{suite_filter} to identify regressions and improvements:
+
+1. Call `get_data_inventory()` to understand the project structure.
+2. Call `list_test_suites(project_code='...')` to find suites{suite_filter} and their latest runs.
+3. For the most recent completed run, call `get_test_results(test_run_id='...')` to get all results.
+4. For the previous run, call `get_test_results(test_run_id='...')` to get all results.
+5. Compare the two runs:
+ - **Regressions:** Tests that passed before but now fail.
+ - **Improvements:** Tests that failed before but now pass.
+ - **Persistent failures:** Tests that failed in both runs.
+ - **Stable passes:** Tests that passed in both runs.
+6. Summarize the trend and highlight any concerning regressions.
+"""
diff --git a/testgen/mcp/server.py b/testgen/mcp/server.py
new file mode 100644
index 00000000..34a2174b
--- /dev/null
+++ b/testgen/mcp/server.py
@@ -0,0 +1,156 @@
+import logging
+
+from mcp.server.auth.provider import AccessToken
+from mcp.server.auth.settings import AuthSettings
+from mcp.server.fastmcp import FastMCP
+
+from testgen import settings
+from testgen.common.auth import decode_jwt_token
+from testgen.common.models import with_database_session
+from testgen.mcp.permissions import set_mcp_username
+
+LOG = logging.getLogger("testgen")
+
+SERVER_INSTRUCTIONS = """\
+TestGen is a data quality platform that profiles databases, generates tests, and monitors tables.
+
+DATA MODEL
+
+Projects contain Connections (to target databases) and Table Groups (sets of tables to profile and test together).
+Table Groups contains Test Suites — collections of Test Definitions with configured thresholds.
+Test Runs execute a Test Suite and produce Test Results (one per Test Definition).
+Profiling Runs scan a Table Group and produce column-level statistics and detects data hygiene issues.
+Monitors track table health over time: freshness, volume, schema changes, and custom metrics.
+
+NAVIGATION
+
+Tools return entity IDs that feed into other tools. Start with get_data_inventory for broad discovery, then drill
+into specific entities.
+
+Test types have specific, non-obvious meanings (e.g., Alpha_Trunc). Do not guess what a test checks.
+ALWAYS look them up using either the `testgen://test-types` resource or the `get_test_type()` tool.
+
+CONVENTIONS
+- Identifiers are UUIDs passed as strings.
+- Dates are ISO 8601 format.
+"""
+
+
+class JWTTokenVerifier:
+ """Verify JWT Bearer tokens for MCP server authentication."""
+
+ async def verify_token(self, token: str) -> AccessToken | None:
+ try:
+ payload = decode_jwt_token(token)
+ set_mcp_username(payload["username"])
+ return AccessToken(
+ token=token,
+ client_id=payload["username"],
+ scopes=[],
+ expires_at=int(payload["exp_date"]),
+ )
+ except (ValueError, KeyError):
+ return None
+
+
+# Uvicorn log config: strip default handlers so logs propagate to the testgen logger.
+_UVICORN_LOG_CONFIG: dict = {
+ "version": 1,
+ "disable_existing_loggers": False,
+ "loggers": {
+ "uvicorn": {"handlers": [], "propagate": True},
+ "uvicorn.access": {"handlers": [], "propagate": True},
+ "uvicorn.error": {"handlers": [], "propagate": True},
+ },
+}
+
+
+def _configure_mcp_logging() -> None:
+ """Route FastMCP and uvicorn logs through the testgen logger."""
+ testgen_logger = logging.getLogger("testgen")
+
+ # FastMCP.__init__ calls basicConfig() which adds a RichHandler to the root logger — remove it
+ logging.getLogger().handlers.clear()
+
+ # Reparent top-level third-party loggers so they (and their children) propagate through testgen's handler
+ for name in ("mcp", "uvicorn"):
+ logging.getLogger(name).parent = testgen_logger
+
+
+def run_mcp() -> None:
+ """Start the MCP server with streamable HTTP transport."""
+ from testgen.mcp import get_server_url
+ from testgen.mcp.exceptions import mcp_error_handler
+ from testgen.mcp.prompts.workflows import compare_runs, health_check, investigate_failures, table_health
+ from testgen.mcp.tools.discovery import get_data_inventory, list_projects, list_tables, list_test_suites
+ from testgen.mcp.tools.reference import get_test_type, glossary_resource, test_types_resource
+ from testgen.mcp.tools.test_results import get_failure_summary, get_test_result_history, get_test_results
+ from testgen.mcp.tools.test_runs import get_recent_test_runs
+ from testgen.utils.plugins import discover
+
+ for plugin in discover():
+ plugin.load()
+
+ server_url = with_database_session(get_server_url)()
+
+ mcp = FastMCP(
+ "TestGen",
+ host=settings.MCP_HOST,
+ port=settings.MCP_PORT,
+ instructions=SERVER_INSTRUCTIONS,
+ auth=AuthSettings(
+ issuer_url=server_url,
+ resource_server_url=server_url,
+ ),
+ token_verifier=JWTTokenVerifier(),
+ )
+ _configure_mcp_logging()
+
+ def safe_tool(fn):
+ mcp.tool()(mcp_error_handler(fn))
+
+ def safe_resource(uri, fn):
+ mcp.resource(uri)(mcp_error_handler(fn))
+
+ def safe_prompt(fn):
+ mcp.prompt()(mcp_error_handler(fn))
+
+ # Tools (9)
+ safe_tool(get_data_inventory)
+ safe_tool(list_projects)
+ safe_tool(list_tables)
+ safe_tool(list_test_suites)
+ safe_tool(get_recent_test_runs)
+ safe_tool(get_test_results)
+ safe_tool(get_test_result_history)
+ safe_tool(get_failure_summary)
+ safe_tool(get_test_type)
+
+ # Resources (2)
+ safe_resource("testgen://test-types", test_types_resource)
+ safe_resource("testgen://glossary", glossary_resource)
+
+ # Prompts (4)
+ safe_prompt(health_check)
+ safe_prompt(investigate_failures)
+ safe_prompt(table_health)
+ safe_prompt(compare_runs)
+
+ LOG.info("Starting MCP server on %s:%s (auth issuer: %s)", settings.MCP_HOST, settings.MCP_PORT, server_url)
+
+ import uvicorn
+
+ app = mcp.streamable_http_app()
+
+ if settings.IS_DEBUG:
+ from starlette.middleware.cors import CORSMiddleware
+
+ app.add_middleware(
+ CORSMiddleware,
+ allow_origins=["*"],
+ allow_methods=["*"],
+ allow_headers=["*"],
+ expose_headers=["Mcp-Session-Id"],
+ )
+
+ uvicorn.run(app, host=settings.MCP_HOST, port=settings.MCP_PORT, log_config=_UVICORN_LOG_CONFIG)
diff --git a/testgen/mcp/services/__init__.py b/testgen/mcp/services/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/testgen/mcp/services/inventory_service.py b/testgen/mcp/services/inventory_service.py
new file mode 100644
index 00000000..55d40045
--- /dev/null
+++ b/testgen/mcp/services/inventory_service.py
@@ -0,0 +1,145 @@
+from sqlalchemy import and_, select
+
+from testgen.common.models import get_current_session
+from testgen.common.models.connection import Connection
+from testgen.common.models.project import Project
+from testgen.common.models.table_group import TableGroup
+from testgen.common.models.test_suite import TestSuite
+
+
+def get_inventory(
+ project_codes: list[str],
+ view_project_codes: list[str],
+) -> str:
+ """Build a markdown inventory of all projects, connections, table groups, and test suites.
+
+ Args:
+ project_codes: Projects the user can see (based on decorator permission).
+ view_project_codes: Projects where the user has 'view' permission.
+ Connection names and test suites are only shown for these projects.
+ Table groups are always shown so catalog users can browse tables.
+ """
+ session = get_current_session()
+
+ query = (
+ select(
+ Project.project_code,
+ Project.project_name,
+ Connection.connection_id,
+ Connection.connection_name,
+ TableGroup.id.label("table_group_id"),
+ TableGroup.table_groups_name,
+ TableGroup.table_group_schema,
+ TestSuite.id.label("test_suite_id"),
+ TestSuite.test_suite,
+ )
+ .outerjoin(Connection, Connection.project_code == Project.project_code)
+ .outerjoin(TableGroup, TableGroup.connection_id == Connection.connection_id)
+ .outerjoin(
+ TestSuite,
+ and_(
+ TestSuite.table_groups_id == TableGroup.id,
+ TestSuite.is_monitor.isnot(True),
+ ),
+ )
+ )
+
+ query = query.where(Project.project_code.in_(project_codes))
+
+ query = query.order_by(
+ Project.project_name, Connection.connection_name, TableGroup.table_groups_name, TestSuite.test_suite,
+ )
+
+ rows = session.execute(query).all()
+
+ # Build nested structure
+ projects: dict[str, dict] = {}
+ total_groups = 0
+
+ for row in rows:
+ proj = projects.setdefault(row.project_code, {
+ "name": row.project_name,
+ "connections": {},
+ })
+ if row.connection_id is None:
+ continue
+
+ conn = proj["connections"].setdefault(row.connection_id, {
+ "name": row.connection_name,
+ "groups": {},
+ })
+ if row.table_group_id is None:
+ continue
+
+ group = conn["groups"].setdefault(row.table_group_id, {
+ "name": row.table_groups_name,
+ "schema": row.table_group_schema,
+ "suites": [],
+ })
+ if row.test_suite_id is not None:
+ group["suites"].append({
+ "id": str(row.test_suite_id),
+ "name": row.test_suite,
+ })
+
+ total_groups = sum(
+ len(conn["groups"])
+ for proj in projects.values()
+ for conn in proj["connections"].values()
+ )
+ compact_groups = total_groups > 50
+
+ view_codes_set = set(view_project_codes)
+
+ # Format as Markdown
+ lines = ["# Data Inventory\n"]
+
+ for project_code, proj in projects.items():
+ can_view = project_code in view_codes_set
+ lines.append(f"## Project: {proj['name']} (`{project_code}`)\n")
+
+ if not proj["connections"]:
+ if can_view:
+ lines.append("_No connections configured._\n")
+ else:
+ lines.append("_No table groups._\n")
+ continue
+
+ for _conn_id, conn in proj["connections"].items():
+ if can_view:
+ lines.append(f"### Connection: {conn['name']}\n")
+
+ if not conn["groups"]:
+ if can_view:
+ lines.append("_No table groups._\n")
+ continue
+
+ for group_id, group in conn["groups"].items():
+ if compact_groups or not can_view:
+ lines.append(
+ f"- **{group['name']}**: id: `{group_id}`, schema: `{group['schema']}`, "
+ f"test suites: {len(group['suites'])}"
+ )
+ continue
+
+ lines.append(
+ f"#### Table Group: {group['name']} (id: `{group_id}`, schema: `{group['schema']}`)\n"
+ )
+
+ if not group["suites"]:
+ lines.append("_No test suites._\n")
+ continue
+
+ for suite in group["suites"]:
+ lines.append(f"- **{suite['name']}** (id: `{suite['id']}`)")
+ lines.append("")
+
+ lines.append("")
+
+ lines.append(
+ "---\n"
+ "Use `list_tables(table_group_id='...')` to see tables in a group.\n"
+ "Use `list_test_suites(project_code='...')` for suite details and latest run stats."
+ )
+
+ return "\n".join(lines)
diff --git a/testgen/mcp/tools/__init__.py b/testgen/mcp/tools/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/testgen/mcp/tools/discovery.py b/testgen/mcp/tools/discovery.py
new file mode 100644
index 00000000..358f03fb
--- /dev/null
+++ b/testgen/mcp/tools/discovery.py
@@ -0,0 +1,132 @@
+from uuid import UUID
+
+from testgen.common.models import with_database_session
+from testgen.common.models.data_table import DataTable
+from testgen.common.models.project import Project
+from testgen.common.models.test_suite import TestSuite
+from testgen.mcp.exceptions import MCPUserError
+from testgen.mcp.permissions import get_project_permissions, mcp_permission
+
+
+@with_database_session
+@mcp_permission("catalog")
+def get_data_inventory() -> str:
+ """Get a structural inventory of all projects, connections, table groups, and test suites
+ accessible to the authenticated user.
+
+ This is the recommended starting point for understanding the data quality landscape.
+ Returns a structured markdown overview of the TestGen configuration.
+ """
+ from testgen.mcp.services.inventory_service import get_inventory
+
+ perms = get_project_permissions()
+ return get_inventory(
+ project_codes=perms.allowed_codes,
+ view_project_codes=perms.codes_allowed_to("view"),
+ )
+
+
+@with_database_session
+@mcp_permission("catalog")
+def list_projects() -> str:
+ """List all projects the authenticated user has access to.
+
+ Returns project codes and names. Use these to scope queries to specific projects.
+ """
+ perms = get_project_permissions()
+ projects = [p for p in Project.select_where() if perms.has_access(p.project_code)]
+
+ if not projects:
+ return "No projects found."
+
+ lines = ["# Projects\n"]
+ for project in projects:
+ lines.append(f"- **{project.project_name}** (`{project.project_code}`)")
+
+ return "\n".join(lines)
+
+
+@with_database_session
+@mcp_permission("view")
+def list_test_suites(project_code: str) -> str:
+ """List all test suites for a project with their latest run statistics.
+
+ Args:
+ project_code: The project code to list test suites for.
+ """
+ if not project_code:
+ return "Missing required parameter `project_code`."
+
+ perms = get_project_permissions()
+ perms.verify_access(project_code, not_found=f"No test suites found for project `{project_code}`.")
+
+ summaries = TestSuite.select_summary(project_code)
+
+ if not summaries:
+ return f"No test suites found for project `{project_code}`."
+
+ lines = [f"# Test Suites for `{project_code}`\n"]
+ for s in summaries:
+ lines.append(f"## {s.test_suite} (id: `{s.id}`)")
+ lines.append(f"- Connection: {s.connection_name}")
+ lines.append(f"- Table Group: {s.table_groups_name}")
+ if s.test_suite_description:
+ lines.append(f"- Description: {s.test_suite_description}")
+ lines.append(f"- Test definitions: {s.test_ct or 0}")
+
+ if s.latest_run_id:
+ lines.append(f"- Latest run: `{s.latest_run_id}` ({s.latest_run_start})")
+ lines.append(
+ f" - {s.last_run_test_ct or 0} tests: "
+ f"{s.last_run_passed_ct or 0} passed, "
+ f"{s.last_run_failed_ct or 0} failed, "
+ f"{s.last_run_warning_ct or 0} warnings, "
+ f"{s.last_run_error_ct or 0} errors"
+ )
+ if s.last_run_dismissed_ct:
+ lines.append(f" - {s.last_run_dismissed_ct} dismissed")
+ else:
+ lines.append("- _No completed runs._")
+ lines.append("")
+
+ return "\n".join(lines)
+
+
+@with_database_session
+@mcp_permission("catalog")
+def list_tables(table_group_id: str, limit: int = 200, page: int = 1) -> str:
+ """List tables in a table group.
+
+ Args:
+ table_group_id: The table group UUID.
+ limit: Maximum number of tables per page (default 200).
+ page: Page number, starting from 1 (default 1).
+ """
+ try:
+ group_uuid = UUID(table_group_id)
+ except (ValueError, AttributeError) as err:
+ raise MCPUserError(f"Invalid table_group_id: `{table_group_id}` is not a valid UUID.") from err
+
+ perms = get_project_permissions()
+ project_codes = perms.allowed_codes
+
+ offset = (page - 1) * limit
+ table_names = DataTable.select_table_names(group_uuid, limit=limit, offset=offset, project_codes=project_codes)
+ total = DataTable.count_tables(group_uuid, project_codes=project_codes)
+
+ if not table_names:
+ if page > 1:
+ return f"No tables on page {page} (total: {total})."
+ return f"No tables found for table group `{table_group_id}`."
+
+ lines = [f"# Tables in Table Group `{table_group_id}`\n"]
+ lines.append(f"Total tables: {total}. Showing {len(table_names)} (page {page}).\n")
+
+ for name in table_names:
+ lines.append(f"- `{name}`")
+
+ total_pages = (total + limit - 1) // limit
+ if page < total_pages:
+ lines.append(f"\n_Page {page} of {total_pages}. Use `page={page + 1}` for more._")
+
+ return "\n".join(lines)
diff --git a/testgen/mcp/tools/reference.py b/testgen/mcp/tools/reference.py
new file mode 100644
index 00000000..9887effa
--- /dev/null
+++ b/testgen/mcp/tools/reference.py
@@ -0,0 +1,112 @@
+from testgen.common.models import with_database_session
+from testgen.common.models.test_definition import TestType
+
+
+@with_database_session
+def get_test_type(test_type: str) -> str:
+ """Get detailed information about a specific test type.
+
+ Args:
+ test_type: The test type (e.g., 'Alpha Truncation', 'Unique Percent').
+ """
+ matches = TestType.select_where(TestType.test_name_short == test_type)
+ tt = matches[0] if matches else None
+
+ if not tt:
+ return f"Test type `{test_type}` not found. Use `testgen://test-types` to see available types."
+
+ lines = [
+ f"# {tt.test_name_short}\n",
+ ]
+ if tt.test_name_long:
+ lines.append(f"- **Full Name:** {tt.test_name_long}")
+ if tt.test_description:
+ lines.append(f"- **Description:** {tt.test_description}")
+ if tt.measure_uom:
+ lines.append(f"- **Unit of Measure:** {tt.measure_uom}")
+ if tt.measure_uom_description:
+ lines.append(f"- **Measure Description:** {tt.measure_uom_description}")
+ if tt.threshold_description:
+ lines.append(f"- **Threshold:** {tt.threshold_description}")
+ if tt.dq_dimension:
+ lines.append(f"- **Quality Dimension:** {tt.dq_dimension}")
+ if tt.test_scope:
+ lines.append(f"- **Scope:** {tt.test_scope}")
+ if tt.except_message:
+ lines.append(f"- **Exception Message:** {tt.except_message}")
+ if tt.usage_notes:
+ lines.append(f"- **Usage Notes:** {tt.usage_notes}")
+
+ return "\n".join(lines)
+
+
+@with_database_session
+def test_types_resource() -> str:
+ """Reference table of all test types with their descriptions and data quality dimensions."""
+ test_types = TestType.select_where(TestType.active == "Y")
+
+ if not test_types:
+ return "No test types found."
+
+ lines = [
+ "# TestGen Test Types Reference\n",
+ "| Test Type | Quality Dimension | Scope | Description |",
+ "|---|---|---|---|",
+ ]
+
+ for tt in test_types:
+ desc = tt.test_description or ""
+ lines.append(
+ f"| {tt.test_name_short or ''} | "
+ f"{tt.dq_dimension or ''} | {tt.test_scope or ''} | {desc} |"
+ )
+
+ return "\n".join(lines)
+
+
+def glossary_resource() -> str:
+ """Glossary of TestGen concepts, entity hierarchy, result statuses, and quality dimensions."""
+ return """\
+# TestGen Glossary
+
+## Entity Hierarchy
+
+- **Project** — Top-level organizational unit.
+- **Connection** — Database connection configuration (host, credentials).
+- **Table Group** — A set of tables within a schema that are profiled and tested together.
+- **Test Suite** — A collection of test definitions scoped to a table group.
+- **Test Definition** — A configured test with parameters, thresholds, and target table/column.
+- **Test Run** — An execution of a test suite producing test results.
+- **Test Result** — The outcome of a single test definition within a test run.
+
+## Test Result Statuses
+
+- **Passed** — Data meets test criteria.
+- **Warning** — Data does not meet test criteria. Severity configured as Warning.
+- **Failed** — Data does not meet test criteria. Severity configured as Fail.
+- **Error** — Test could not execute (e.g., missing table or permission issue).
+- **Log** — Informational result recorded for reference.
+
+## Disposition
+
+Disposition is a user-assigned review status for test results:
+- **Confirmed** (default) — Result is valid and counts toward scoring.
+- **Dismissed** — Result reviewed and dismissed (excluded from scoring).
+- **Muted** — Test was deactivated after this result (excluded from scoring).
+
+## Data Quality Dimensions
+
+- **Accuracy** — Data values are correct and reflect real-world truth.
+- **Completeness** — Required data is present (no unexpected NULLs or blanks).
+- **Consistency** — Data agrees across columns, tables, or systems.
+- **Timeliness** — Data is current and updated within expected windows.
+- **Uniqueness** — No unintended duplicates exist.
+- **Validity** — Data conforms to expected formats, ranges, or patterns.
+
+## Test Scopes
+
+- **column** — Tests a single column (e.g., null rate, pattern match).
+- **table** — Tests table-level properties (e.g., row count, freshness).
+- **referential** — Tests relationships between tables (e.g., foreign key match).
+- **custom** — User-defined SQL tests.
+"""
diff --git a/testgen/mcp/tools/test_results.py b/testgen/mcp/tools/test_results.py
new file mode 100644
index 00000000..c76f2e5d
--- /dev/null
+++ b/testgen/mcp/tools/test_results.py
@@ -0,0 +1,223 @@
+from uuid import UUID
+
+from testgen.common.models import with_database_session
+from testgen.common.models.test_definition import TestType
+from testgen.common.models.test_result import TestResult, TestResultStatus
+from testgen.mcp.exceptions import MCPUserError
+from testgen.mcp.permissions import get_project_permissions, mcp_permission
+
+
+def _parse_uuid(value: str, label: str = "ID") -> UUID:
+ try:
+ return UUID(value)
+ except (ValueError, AttributeError) as err:
+ raise MCPUserError(f"Invalid {label}: `{value}` is not a valid UUID.") from err
+
+
+def _parse_status(value: str) -> TestResultStatus:
+ try:
+ return TestResultStatus(value)
+ except ValueError as err:
+ valid = ", ".join(s.value for s in TestResultStatus)
+ raise MCPUserError(f"Invalid status `{value}`. Valid values: {valid}") from err
+
+
+def _resolve_test_type(short_name: str) -> str:
+ """Resolve a test type short name to its internal code."""
+ matches = TestType.select_where(TestType.test_name_short == short_name)
+ if not matches:
+ raise MCPUserError(f"Unknown test type: `{short_name}`. Use the testgen://test-types resource to see available types.")
+ return matches[0].test_type
+
+
+@with_database_session
+@mcp_permission("view")
+def get_test_results(
+ test_run_id: str,
+ status: str | None = None,
+ table_name: str | None = None,
+ test_type: str | None = None,
+ limit: int = 50,
+ page: int = 1,
+) -> str:
+ """Get individual test results for a test run, with optional filters.
+
+ Args:
+ test_run_id: The UUID of the test run.
+ status: Filter by result status (Passed, Failed, Warning, Error, Log).
+ table_name: Filter by table name.
+ test_type: Filter by test type (e.g. 'Alpha Truncation', 'Unique Percent').
+ limit: Maximum number of results per page (default 50).
+ page: Page number, starting from 1 (default 1).
+ """
+ run_uuid = _parse_uuid(test_run_id, "test_run_id")
+ status_enum = _parse_status(status) if status else None
+ offset = (page - 1) * limit
+
+ test_type_code = _resolve_test_type(test_type) if test_type else None
+
+ perms = get_project_permissions()
+
+ results = TestResult.select_results(
+ test_run_id=run_uuid,
+ status=status_enum,
+ table_name=table_name,
+ test_type=test_type_code,
+ limit=limit,
+ offset=offset,
+ project_codes=perms.allowed_codes,
+ )
+
+ if not results:
+ filters = []
+ if status:
+ filters.append(f"status={status}")
+ if table_name:
+ filters.append(f"table={table_name}")
+ if test_type:
+ filters.append(f"type={test_type}")
+ filter_str = f" (filters: {', '.join(filters)})" if filters else ""
+ return f"No test results found for run `{test_run_id}`{filter_str}."
+
+ type_names = {tt.test_type: tt.test_name_short for tt in TestType.select_where(TestType.active == "Y")}
+
+ lines = [f"# Test Results for run `{test_run_id}`\n"]
+ lines.append(f"Showing {len(results)} result(s) (page {page}).\n")
+
+ for r in results:
+ status_str = r.status.value if r.status else "Unknown"
+ test_name = type_names.get(r.test_type, r.test_type)
+ if r.column_names:
+ title = f"## [{status_str}] {test_name} on `{r.column_names}` in `{r.table_name}`"
+ else:
+ title = f"## [{status_str}] {test_name} on `{r.table_name}`"
+ lines.append(title)
+ lines.append(f"- Test definition: `{r.test_definition_id}`")
+ if r.column_names:
+ lines.append(f"- Column: `{r.column_names}`")
+ if r.result_measure is not None:
+ lines.append(f"- Measured value: {r.result_measure}")
+ if r.threshold_value is not None:
+ lines.append(f"- Threshold: {r.threshold_value}")
+ if r.message:
+ lines.append(f"- Message: {r.message}")
+ lines.append("")
+
+ return "\n".join(lines)
+
+
+@with_database_session
+@mcp_permission("view")
+def get_failure_summary(test_run_id: str, group_by: str = "test_type") -> str:
+ """Get a summary of test failures (Failed and Warning) grouped by test type, table name, or column.
+
+ Args:
+ test_run_id: The UUID of the test run.
+ group_by: Group failures by 'test_type', 'table', or 'column' (default: 'test_type').
+ """
+ run_uuid = _parse_uuid(test_run_id, "test_run_id")
+
+ perms = get_project_permissions()
+
+ # Map public param names to model field names
+ model_group_map = {"table": "table_name", "column": "column_names"}
+ model_group_by = model_group_map.get(group_by, group_by)
+ failures = TestResult.select_failures(test_run_id=run_uuid, group_by=model_group_by, project_codes=perms.allowed_codes)
+
+ if not failures:
+ return f"No confirmed failures found for run `{test_run_id}`."
+
+ total = sum(row[-1] for row in failures)
+
+ if group_by == "test_type":
+ type_names = {tt.test_type: tt.test_name_short for tt in TestType.select_where(TestType.active == "Y")}
+
+ lines = [
+ f"# Failure Summary for run `{test_run_id}`\n",
+ f"**Total confirmed failures (Failed + Warning):** {total}\n",
+ ]
+
+ if group_by == "test_type":
+ lines.append("| Test Type | Severity | Count |")
+ lines.append("|---|---|---|")
+ else:
+ group_label = {"table": "Table Name", "column": "Column"}[group_by]
+ lines.append(f"| {group_label} | Count |")
+ lines.append("|---|---|")
+
+ for row in failures:
+ count = row[-1]
+ if group_by == "column":
+ # Row is (table_name, column_names, count)
+ table, column = row[0], row[1]
+ label = f"`{column}` in `{table}`" if column else f"`{table}` (table-level)"
+ lines.append(f"| {label} | {count} |")
+ elif group_by == "test_type":
+ # Row is (test_type, status, count)
+ code = row[0]
+ status = row[1]
+ name = type_names.get(code, code)
+ severity = status.value if status else "Unknown"
+ lines.append(f"| {name} | {severity} | {count} |")
+ else:
+ lines.append(f"| `{row[0]}` | {count} |")
+
+ if group_by == "test_type":
+ lines.append(
+ "\nCheck `testgen://test-types` to understand what each test type checks "
+ "and `get_test_type(test_type='...')` to fetch more details."
+ )
+
+ return "\n".join(lines)
+
+
+@with_database_session
+@mcp_permission("view")
+def get_test_result_history(
+ test_definition_id: str,
+ limit: int = 20,
+ page: int = 1,
+) -> str:
+ """Get the historical results of a specific test definition across runs, showing how measure and status changed over time.
+
+ Args:
+ test_definition_id: The UUID of the test definition (from get_test_results output).
+ limit: Maximum number of historical results per page (default 20).
+ page: Page number, starting from 1 (default 1).
+ """
+ def_uuid = _parse_uuid(test_definition_id, "test_definition_id")
+ offset = (page - 1) * limit
+
+ perms = get_project_permissions()
+
+ results = TestResult.select_history(test_definition_id=def_uuid, limit=limit, offset=offset, project_codes=perms.allowed_codes)
+
+ if not results:
+ return f"No historical results found for test definition `{test_definition_id}`."
+
+ type_names = {tt.test_type: tt.test_name_short for tt in TestType.select_where(TestType.active == "Y")}
+
+ first = results[0]
+ test_name = type_names.get(first.test_type, first.test_type)
+ lines = [
+ "# Test Result History\n",
+ f"- **Test Type:** {test_name}",
+ f"- **Table:** `{first.table_name}`",
+ ]
+ if first.column_names:
+ lines.append(f"- **Column:** `{first.column_names}`")
+
+ lines.extend([
+ f"\nShowing {len(results)} result(s), newest first (page {page}).\n",
+ "| Date | Measure | Threshold | Status |",
+ "|---|---|---|---|",
+ ])
+
+ for r in results:
+ date_str = str(r.test_time) if r.test_time else "—"
+ measure = r.result_measure if r.result_measure is not None else "—"
+ threshold = r.threshold_value if r.threshold_value is not None else "—"
+ status_str = r.status.value if r.status else "—"
+ lines.append(f"| {date_str} | {measure} | {threshold} | {status_str} |")
+
+ return "\n".join(lines)
diff --git a/testgen/mcp/tools/test_runs.py b/testgen/mcp/tools/test_runs.py
new file mode 100644
index 00000000..26053832
--- /dev/null
+++ b/testgen/mcp/tools/test_runs.py
@@ -0,0 +1,79 @@
+from testgen.common.models import with_database_session
+from testgen.common.models.test_run import TestRun
+from testgen.common.models.test_suite import TestSuite
+from testgen.mcp.permissions import get_project_permissions, mcp_permission
+
+
+@with_database_session
+@mcp_permission("view")
+def get_recent_test_runs(project_code: str, test_suite: str | None = None, limit: int = 1) -> str:
+ """Get the latest test runs for each test suite in a project, optionally filtered by test suite name.
+
+ Args:
+ project_code: The project code to query.
+ test_suite: Optional test suite name to filter by.
+ limit: Maximum runs per test suite (default 1).
+ """
+ if not project_code:
+ return "Missing required parameter `project_code`."
+
+ perms = get_project_permissions()
+ perms.verify_access(project_code, not_found=f"No completed test runs found in project `{project_code}`.")
+
+ test_suite_id = None
+ if test_suite:
+ suites = TestSuite.select_minimal_where(
+ TestSuite.project_code == project_code,
+ TestSuite.test_suite == test_suite,
+ )
+ if not suites:
+ return f"Test suite `{test_suite}` not found in project `{project_code}`."
+ test_suite_id = str(suites[0].id)
+
+ summaries = TestRun.select_summary(project_code=project_code, test_suite_id=test_suite_id)
+
+ if not summaries:
+ scope = f" for suite `{test_suite}`" if test_suite else ""
+ return f"No completed test runs found in project `{project_code}`{scope}."
+
+ # Take the first `limit` runs per suite (summaries are ordered by test_starttime DESC)
+ seen: dict[str, int] = {}
+ runs = []
+ for s in summaries:
+ count = seen.get(s.test_suite, 0)
+ if count < limit:
+ runs.append(s)
+ seen[s.test_suite] = count + 1
+
+ lines = [f"# Recent Test Runs for `{project_code}`\n"]
+ if test_suite:
+ lines[0] = f"# Recent Test Runs for `{project_code}` / `{test_suite}`\n"
+ lines.append(f"Showing {len(runs)} run(s) ({limit} per suite).\n")
+
+ current_suite = None
+ for run in runs:
+ if run.test_suite != current_suite:
+ current_suite = run.test_suite
+ lines.append(f"## {current_suite}\n")
+
+ passed = run.passed_ct or 0
+ failed = run.failed_ct or 0
+ warning = run.warning_ct or 0
+ errors = run.error_ct or 0
+
+ lines.append(f"### {run.test_starttime} — {run.status_label}")
+ lines.append(f"- **Run ID:** `{run.test_run_id}`")
+ lines.append(f"- **Started:** {run.test_starttime} | **Ended:** {run.test_endtime}")
+ lines.append(f"- **Results:** {run.test_ct or 0} tests — {passed} passed, {failed} failed, {warning} warnings, {errors} errors")
+
+ if run.dismissed_ct:
+ lines.append(f"- **Dismissed:** {run.dismissed_ct}")
+
+ if run.dq_score_testing is not None:
+ lines.append(f"- **Testing Score:** {run.dq_score_testing:.1f}")
+
+ lines.append("")
+
+ lines.append("Use `get_test_results(test_run_id='...')` for detailed results of a specific run.")
+
+ return "\n".join(lines)
diff --git a/testgen/scheduler/base.py b/testgen/scheduler/base.py
index 0c0e5bc6..ef88be83 100644
--- a/testgen/scheduler/base.py
+++ b/testgen/scheduler/base.py
@@ -122,7 +122,8 @@ def _run(self):
try:
triggering_time, jobs = next(next_jobs)
except StopIteration:
- self._reload_event.wait()
+ if not self._stopping.is_set():
+ self._reload_event.wait()
break
if self._wait_until(triggering_time):
diff --git a/testgen/settings.py b/testgen/settings.py
index cf71768d..8d2b4512 100644
--- a/testgen/settings.py
+++ b/testgen/settings.py
@@ -1,13 +1,13 @@
import os
import typing
-IS_DEBUG_LOG_LEVEL: bool = os.getenv("TESTGEN_DEBUG_LOG_LEVEL", "no").lower() == "yes"
+IS_DEBUG_LOG_LEVEL: bool = os.getenv("TESTGEN_DEBUG_LOG_LEVEL", "no").lower() in ("yes", "true")
"""
When set, logs will be at debug level.
defaults to: `no`
"""
-IS_DEBUG: bool = os.getenv("TESTGEN_DEBUG", "no").lower() == "yes"
+IS_DEBUG: bool = os.getenv("TESTGEN_DEBUG", "no").lower() in ("yes", "true")
"""
When True invalidates the cache with the bootstrapped application
causing the changes to the routing and plugins to take effect on every
@@ -17,7 +17,7 @@
defaults to: `True`
"""
-LOG_TO_FILE: bool = os.getenv("TESTGEN_LOG_TO_FILE", "yes").lower() == "yes"
+LOG_TO_FILE: bool = os.getenv("TESTGEN_LOG_TO_FILE", "yes").lower() in ("yes", "true")
"""
When set, rotating file logs will be generated.
defaults to: `True`
@@ -266,7 +266,7 @@
defaults to: `environ[DATABASE_PORT]`
"""
-SKIP_DATABASE_CERTIFICATE_VERIFICATION: bool = os.getenv("TG_TARGET_DB_TRUST_SERVER_CERTIFICATE", "no").lower() == "yes"
+SKIP_DATABASE_CERTIFICATE_VERIFICATION: bool = os.getenv("TG_TARGET_DB_TRUST_SERVER_CERTIFICATE", "no").lower() in ("yes", "true")
"""
When True for supported SQL flavors, set up the SQLAlchemy connection to
trust the database server certificate.
@@ -372,7 +372,7 @@
from env variable: `OBSERVABILITY_API_KEY`
"""
-OBSERVABILITY_VERIFY_SSL: bool = os.getenv("TG_EXPORT_TO_OBSERVABILITY_VERIFY_SSL", "yes").lower() in ["yes", "true"]
+OBSERVABILITY_VERIFY_SSL: bool = os.getenv("TG_EXPORT_TO_OBSERVABILITY_VERIFY_SSL", "yes").lower() in ("yes", "true")
"""
When False, exporting events to your instance of Observability will skip
SSL verification.
@@ -456,7 +456,7 @@
Random ID that uniquely identifies the instance.
"""
-ANALYTICS_ENABLED: bool = os.getenv("TG_ANALYTICS", "yes").lower() in ("true", "yes")
+ANALYTICS_ENABLED: bool = os.getenv("TG_ANALYTICS", "yes").lower() in ("yes", "true")
"""
Disables sending usage data when set to any value except "true" and "yes". Defaults to "yes"
"""
@@ -500,3 +500,27 @@
"""
Email: SMTP password
"""
+
+MCP_PORT: int = int(os.getenv("TG_MCP_PORT", "8510"))
+"""
+Port for the MCP server.
+
+from env variable: `TG_MCP_PORT`
+defaults to: `8510`
+"""
+
+MCP_HOST: str = os.getenv("TG_MCP_HOST", "0.0.0.0") # noqa: S104
+"""
+Host for the MCP server.
+
+from env variable: `TG_MCP_HOST`
+defaults to: `0.0.0.0`
+"""
+
+MCP_ENABLED: bool = os.getenv("TG_MCP_ENABLED", "no").lower() in ("yes", "true")
+"""
+Enable the MCP server when running `testgen run-app all`.
+
+from env variable: `TG_MCP_ENABLED`
+defaults to: `Yes`
+"""
diff --git a/testgen/template/dbsetup/020_create_standard_functions_sprocs.sql b/testgen/template/dbsetup/020_create_standard_functions_sprocs.sql
index d2285833..013343f0 100644
--- a/testgen/template/dbsetup/020_create_standard_functions_sprocs.sql
+++ b/testgen/template/dbsetup/020_create_standard_functions_sprocs.sql
@@ -10,7 +10,7 @@ $$
WHEN UPPER(difftype) IN ('DAY', 'DD')
THEN DATE_PART('day', seconddate - firstdate)
WHEN UPPER(difftype) IN ('WEEK','WK')
- THEN TRUNC(DATE_PART('day', seconddate - firstdate)/7)
+ THEN (DATE_TRUNC('week', seconddate)::DATE - DATE_TRUNC('week', firstdate)::DATE) / 7
WHEN UPPER(difftype) IN ('MON', 'MM')
THEN 12 * (DATE_PART('year', seconddate) - DATE_PART('year', firstdate))
+ (DATE_PART('month', seconddate) - DATE_PART('month', firstdate))
diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
index d55ba76a..cd05e290 100644
--- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
+++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
@@ -113,6 +113,8 @@ CREATE TABLE table_groups
profile_sample_min_count BIGINT DEFAULT 100000,
profiling_delay_days VARCHAR(3) DEFAULT '0',
profile_flag_cdes BOOLEAN DEFAULT TRUE,
+ profile_flag_pii BOOLEAN DEFAULT TRUE,
+ profile_exclude_xde BOOLEAN DEFAULT TRUE,
profile_do_pair_rules VARCHAR(3) DEFAULT 'N',
profile_pair_rule_pct INTEGER DEFAULT 95,
include_in_dashboard BOOLEAN DEFAULT TRUE,
@@ -236,10 +238,22 @@ CREATE TABLE test_definitions (
profiling_as_of_date TIMESTAMP,
last_manual_update TIMESTAMP DEFAULT NULL,
export_to_observability VARCHAR(5),
+ flagged BOOLEAN DEFAULT FALSE NOT NULL,
CONSTRAINT test_definitions_test_suites_test_suite_id_fk
FOREIGN KEY (test_suite_id) REFERENCES test_suites
);
+CREATE TABLE test_definition_notes (
+ id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
+ test_definition_id UUID NOT NULL REFERENCES test_definitions ON DELETE CASCADE,
+ detail TEXT NOT NULL,
+ created_by VARCHAR(100) NOT NULL,
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+ updated_at TIMESTAMP
+);
+
+CREATE INDEX ix_tdn_tdid ON test_definition_notes(test_definition_id, created_at DESC);
+
CREATE TABLE profile_results (
id UUID DEFAULT gen_random_uuid()
CONSTRAINT profile_results_id_pk
@@ -329,6 +343,7 @@ CREATE TABLE profile_anomaly_types (
anomaly_description VARCHAR(500),
anomaly_criteria VARCHAR(2000),
detail_expression VARCHAR(2000),
+ detail_redactable BOOLEAN DEFAULT FALSE,
issue_likelihood VARCHAR(50), -- Potential, Likely, Certain
suggested_action VARCHAR(1000),
dq_score_prevalence_formula TEXT,
@@ -435,6 +450,8 @@ CREATE TABLE data_column_chars (
functional_data_type VARCHAR(50),
description VARCHAR(1000),
critical_data_element BOOLEAN,
+ excluded_data_element BOOLEAN,
+ pii_flag VARCHAR(50),
data_source VARCHAR(40),
source_system VARCHAR(40),
source_process VARCHAR(40),
@@ -485,6 +502,7 @@ CREATE TABLE test_types (
default_parm_values TEXT,
default_parm_prompts TEXT,
default_parm_help TEXT,
+ default_parm_required TEXT,
default_severity VARCHAR(10),
run_type VARCHAR(10),
test_scope VARCHAR,
@@ -596,6 +614,7 @@ CREATE TABLE target_data_lookups (
sql_flavor VARCHAR(20) NOT NULL,
lookup_type VARCHAR(10),
lookup_query VARCHAR,
+ lookup_redactable_columns VARCHAR(100),
error_type VARCHAR(30) NOT NULL,
CONSTRAINT target_data_lookups_test_id_sql_flavor_error_type_pk
PRIMARY KEY (test_id, sql_flavor, error_type)
@@ -620,7 +639,7 @@ CREATE TABLE auth_users (
email VARCHAR(256),
name VARCHAR(256),
password VARCHAR(120),
- role VARCHAR(20),
+ is_global_admin BOOLEAN NOT NULL DEFAULT FALSE,
latest_login TIMESTAMP
);
@@ -628,6 +647,29 @@ ALTER TABLE auth_users
ADD CONSTRAINT unique_username
UNIQUE (username);
+CREATE TABLE project_memberships (
+ id UUID DEFAULT gen_random_uuid()
+ CONSTRAINT pk_project_memberships_id
+ PRIMARY KEY,
+ user_id UUID NOT NULL
+ CONSTRAINT fk_project_memberships_auth_users
+ REFERENCES auth_users(id)
+ ON DELETE CASCADE,
+ project_code VARCHAR(30) NOT NULL
+ CONSTRAINT fk_project_memberships_projects
+ REFERENCES projects(project_code)
+ ON DELETE CASCADE,
+ role VARCHAR(20) NOT NULL,
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+
+ CONSTRAINT uq_project_memberships_user_project
+ UNIQUE (user_id, project_code)
+);
+
+CREATE INDEX ix_pm_user_id ON project_memberships(user_id);
+CREATE INDEX ix_pm_project_code ON project_memberships(project_code);
+CREATE INDEX ix_pm_role ON project_memberships(role);
+
CREATE TABLE tg_revision (
component VARCHAR(50) NOT NULL
CONSTRAINT tg_revision_component_pk
@@ -746,20 +788,20 @@ CREATE INDEX ix_td_ts_tc
CREATE UNIQUE INDEX uix_td_autogen_schema
ON test_definitions (test_suite_id, test_type, schema_name)
- WHERE last_auto_gen_date IS NOT NULL
- AND table_name IS NULL
+ WHERE last_auto_gen_date IS NOT NULL
+ AND table_name IS NULL
AND column_name IS NULL;
CREATE UNIQUE INDEX uix_td_autogen_table
ON test_definitions (test_suite_id, test_type, schema_name, table_name)
- WHERE last_auto_gen_date IS NOT NULL
- AND table_name IS NOT NULL
+ WHERE last_auto_gen_date IS NOT NULL
+ AND table_name IS NOT NULL
AND column_name IS NULL;
CREATE UNIQUE INDEX uix_td_autogen_column
ON test_definitions (test_suite_id, test_type, schema_name, table_name, column_name)
- WHERE last_auto_gen_date IS NOT NULL
- AND table_name IS NOT NULL
+ WHERE last_auto_gen_date IS NOT NULL
+ AND table_name IS NOT NULL
AND column_name IS NOT NULL;
-- Index test_runs
@@ -795,7 +837,7 @@ CREATE INDEX ix_tr_ts_tctt
ON test_results(test_suite_id, table_name, column_names, test_type);
-- Index data_structure_log
-CREATE INDEX ix_dsl_tg_tcd
+CREATE INDEX ix_dsl_tg_tcd
ON data_structure_log (table_groups_id, table_name, change_date);
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
diff --git a/testgen/template/dbsetup/040_populate_new_schema_project.sql b/testgen/template/dbsetup/040_populate_new_schema_project.sql
index c6c959f5..36f6a30c 100644
--- a/testgen/template/dbsetup/040_populate_new_schema_project.sql
+++ b/testgen/template/dbsetup/040_populate_new_schema_project.sql
@@ -7,11 +7,22 @@ SELECT '{PROJECT_CODE}' as project_code,
'{OBSERVABILITY_API_KEY}' as observability_api_key,
'{OBSERVABILITY_API_URL}' as observability_api_url;
-INSERT INTO auth_users
- (username, email, name, password, role)
-SELECT
- '{UI_USER_USERNAME}' as username,
- '{UI_USER_EMAIL}' as email,
- '{UI_USER_NAME}' as name,
- '{UI_USER_ENCRYPTED_PASSWORD}' as password,
- 'admin' as role;
+
+WITH inserted_user AS (
+ INSERT INTO auth_users
+ (username, email, name, password, is_global_admin)
+ SELECT
+ '{UI_USER_USERNAME}' as username,
+ '{UI_USER_EMAIL}' as email,
+ '{UI_USER_NAME}' as name,
+ '{UI_USER_ENCRYPTED_PASSWORD}' as password,
+ true as is_global_admin
+ RETURNING id
+)
+INSERT INTO project_memberships
+ (user_id, project_code, role, created_at)
+SELECT id AS user_id,
+ '{PROJECT_CODE}' AS project_code,
+ 'admin' AS role,
+ NOW() AS created_at
+FROM inserted_user;
diff --git a/testgen/template/dbsetup/075_grant_role_rights.sql b/testgen/template/dbsetup/075_grant_role_rights.sql
index df1d6dea..af100289 100644
--- a/testgen/template/dbsetup/075_grant_role_rights.sql
+++ b/testgen/template/dbsetup/075_grant_role_rights.sql
@@ -41,7 +41,8 @@ GRANT SELECT, INSERT, DELETE, UPDATE ON
{SCHEMA_NAME}.score_history_latest_runs,
{SCHEMA_NAME}.job_schedules,
{SCHEMA_NAME}.settings,
- {SCHEMA_NAME}.notification_settings
+ {SCHEMA_NAME}.notification_settings,
+ {SCHEMA_NAME}.test_definition_notes
TO testgen_execute_role;
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Boolean_Value_Mismatch.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Boolean_Value_Mismatch.yaml
index fc3bd2e8..1f184a75 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Boolean_Value_Mismatch.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Boolean_Value_Mismatch.yaml
@@ -16,6 +16,7 @@ profile_anomaly_types:
detail_expression: |-
CASE WHEN p.top_freq_values IS NULL THEN 'Min: ' || p.min_text || ', Max: ' || p.max_text
ELSE 'Top Freq: ' || p.top_freq_values END
+ detail_redactable: true
issue_likelihood: Likely
suggested_action: "Review your source data and follow-up with data owners to determine\
\ whether this data needs to be corrected. "
@@ -83,3 +84,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1514'
+ test_id: '1015'
+ test_type: Boolean_Value_Mismatch
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1514'
+ test_id: '1015'
+ test_type: Boolean_Value_Mismatch
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Char_Column_Date_Values.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Char_Column_Date_Values.yaml
index d7690240..7e25517d 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Char_Column_Date_Values.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Char_Column_Date_Values.yaml
@@ -95,3 +95,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT A.* FROM (SELECT DISTINCT 'Date' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_DATE;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_2}) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT 'Non-Date' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_DATE;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_2}) AS B ORDER BY data_type, count DESC;
error_type: Profile Anomaly
+ - id: '1511'
+ test_id: '1012'
+ test_type: Char_Column_Date_Values
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT A.* FROM (SELECT DISTINCT 'Date' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_DATE;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC FETCH FIRST {LIMIT_2} ROWS ONLY) A UNION ALL SELECT B.* FROM (SELECT DISTINCT 'Non-Date' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_DATE;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC FETCH FIRST {LIMIT_2} ROWS ONLY) B ORDER BY data_type, count DESC
+ error_type: Profile Anomaly
+ - id: '1511'
+ test_id: '1012'
+ test_type: Char_Column_Date_Values
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT A.* FROM (SELECT DISTINCT 'Date' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_DATE;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_2}) A UNION ALL SELECT B.* FROM (SELECT DISTINCT 'Non-Date' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_DATE;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_2}) B ORDER BY data_type, count DESC
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Char_Column_Number_Values.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Char_Column_Number_Values.yaml
index 9c600bac..d5d5ce14 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Char_Column_Number_Values.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Char_Column_Number_Values.yaml
@@ -95,3 +95,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT A.* FROM (SELECT DISTINCT 'Numeric' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_NUM;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_2}) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT 'Non-Numeric' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_NUM;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_2}) AS B ORDER BY data_type, count DESC;
error_type: Profile Anomaly
+ - id: '1510'
+ test_id: '1011'
+ test_type: Char_Column_Number_Values
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT A.* FROM (SELECT DISTINCT 'Numeric' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_NUM;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC FETCH FIRST {LIMIT_2} ROWS ONLY) A UNION ALL SELECT B.* FROM (SELECT DISTINCT 'Non-Numeric' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_NUM;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC FETCH FIRST {LIMIT_2} ROWS ONLY) B ORDER BY data_type, count DESC
+ error_type: Profile Anomaly
+ - id: '1510'
+ test_id: '1011'
+ test_type: Char_Column_Number_Values
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT A.* FROM (SELECT DISTINCT 'Numeric' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_NUM;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_2}) A UNION ALL SELECT B.* FROM (SELECT DISTINCT 'Non-Numeric' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_NUM;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_2}) B ORDER BY data_type, count DESC
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Column_Pattern_Mismatch.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Column_Pattern_Mismatch.yaml
index 7bdd0df6..00e37271 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Column_Pattern_Mismatch.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Column_Pattern_Mismatch.yaml
@@ -124,3 +124,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT A.* FROM (SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT trim(split_part('{DETAIL_EXPRESSION}', '|', 4)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}"::VARCHAR, '[a-z]', 'a'), '[A-Z]', 'A'), '[0-9]', 'N') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_4}) A UNION ALL SELECT B.* FROM (SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT trim(split_part('{DETAIL_EXPRESSION}', '|', 6)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}"::VARCHAR, '[a-z]', 'a'), '[A-Z]', 'A'), '[0-9]', 'N') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_4}) B UNION ALL SELECT C.* FROM (SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT trim(split_part('{DETAIL_EXPRESSION}', '|', 8)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}"::VARCHAR, '[a-z]', 'a'), '[A-Z]', 'A'), '[0-9]', 'N') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_4}) C UNION ALL SELECT D.* FROM (SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT trim(split_part('{DETAIL_EXPRESSION}', '|', 10)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}"::VARCHAR, '[a-z]', 'a'), '[A-Z]', 'A'), '[0-9]', 'N') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_4}) D ORDER BY top_pattern DESC, count DESC;
error_type: Profile Anomaly
+ - id: '1506'
+ test_id: '1007'
+ test_type: Column_Pattern_Mismatch
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT A.* FROM (SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT TRIM(REGEXP_SUBSTR('{DETAIL_EXPRESSION}', '[^|]+', 1, 4)) AS top_pattern FROM DUAL) b WHERE REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE("{COLUMN_NAME}", '[a-z]', 'a'), '[A-Z]', 'A'), '[0-9]', 'N') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC FETCH FIRST {LIMIT_4} ROWS ONLY) A UNION ALL SELECT B.* FROM (SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT TRIM(REGEXP_SUBSTR('{DETAIL_EXPRESSION}', '[^|]+', 1, 6)) AS top_pattern FROM DUAL) b WHERE REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE("{COLUMN_NAME}", '[a-z]', 'a'), '[A-Z]', 'A'), '[0-9]', 'N') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC FETCH FIRST {LIMIT_4} ROWS ONLY) B UNION ALL SELECT C.* FROM (SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT TRIM(REGEXP_SUBSTR('{DETAIL_EXPRESSION}', '[^|]+', 1, 8)) AS top_pattern FROM DUAL) b WHERE REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE("{COLUMN_NAME}", '[a-z]', 'a'), '[A-Z]', 'A'), '[0-9]', 'N') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC FETCH FIRST {LIMIT_4} ROWS ONLY) C UNION ALL SELECT D.* FROM (SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT TRIM(REGEXP_SUBSTR('{DETAIL_EXPRESSION}', '[^|]+', 1, 10)) AS top_pattern FROM DUAL) b WHERE REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE("{COLUMN_NAME}", '[a-z]', 'a'), '[A-Z]', 'A'), '[0-9]', 'N') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC FETCH FIRST {LIMIT_4} ROWS ONLY) D ORDER BY top_pattern DESC, count DESC
+ error_type: Profile Anomaly
+ - id: '1506'
+ test_id: '1007'
+ test_type: Column_Pattern_Mismatch
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT A.* FROM (SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT TRIM(SUBSTR_REGEXPR('[^|]+' IN '{DETAIL_EXPRESSION}' OCCURRENCE 4)) AS top_pattern FROM DUMMY) b WHERE REPLACE_REGEXPR('[0-9]' IN REPLACE_REGEXPR('[A-Z]' IN REPLACE_REGEXPR('[a-z]' IN "{COLUMN_NAME}" WITH 'a') WITH 'A') WITH 'N') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_4}) A UNION ALL SELECT B.* FROM (SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT TRIM(SUBSTR_REGEXPR('[^|]+' IN '{DETAIL_EXPRESSION}' OCCURRENCE 6)) AS top_pattern FROM DUMMY) b WHERE REPLACE_REGEXPR('[0-9]' IN REPLACE_REGEXPR('[A-Z]' IN REPLACE_REGEXPR('[a-z]' IN "{COLUMN_NAME}" WITH 'a') WITH 'A') WITH 'N') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_4}) B UNION ALL SELECT C.* FROM (SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT TRIM(SUBSTR_REGEXPR('[^|]+' IN '{DETAIL_EXPRESSION}' OCCURRENCE 8)) AS top_pattern FROM DUMMY) b WHERE REPLACE_REGEXPR('[0-9]' IN REPLACE_REGEXPR('[A-Z]' IN REPLACE_REGEXPR('[a-z]' IN "{COLUMN_NAME}" WITH 'a') WITH 'A') WITH 'N') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_4}) C UNION ALL SELECT D.* FROM (SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT TRIM(SUBSTR_REGEXPR('[^|]+' IN '{DETAIL_EXPRESSION}' OCCURRENCE 10)) AS top_pattern FROM DUMMY) b WHERE REPLACE_REGEXPR('[0-9]' IN REPLACE_REGEXPR('[A-Z]' IN REPLACE_REGEXPR('[a-z]' IN "{COLUMN_NAME}" WITH 'a') WITH 'A') WITH 'N') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_4}) D ORDER BY top_pattern DESC, count DESC
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Delimited_Data_Embedded.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Delimited_Data_Embedded.yaml
index f2a2adec..caf0ea32 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Delimited_Data_Embedded.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Delimited_Data_Embedded.yaml
@@ -9,6 +9,7 @@ profile_anomaly_types:
p.std_pattern_match = 'DELIMITED_DATA'
detail_expression: |-
CASE WHEN p.top_freq_values IS NULL THEN 'Min: ' || p.min_text || ', Max: ' || p.max_text ELSE 'Top Freq: ' || p.top_freq_values END
+ detail_redactable: true
issue_likelihood: Likely
suggested_action: |-
Review your source data and follow-up with data consumers to determine the most useful representation of this data.
@@ -78,3 +79,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE REGEXP_LIKE("{COLUMN_NAME}"::VARCHAR, '^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$') AND NOT REGEXP_LIKE("{COLUMN_NAME}"::VARCHAR, '.*\\s(and|but|or|yet)\\s.*') GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1524'
+ test_id: '1025'
+ test_type: Delimited_Data_Embedded
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE REGEXP_LIKE("{COLUMN_NAME}", '^([^,|' || CHR(9) || ']{1,20}[,|' || CHR(9) || ']){2,}[^,|' || CHR(9) || ']{0,20}([,|' || CHR(9) || ']{0,1}[^,|' || CHR(9) || ']{0,20})*$') AND NOT REGEXP_LIKE("{COLUMN_NAME}", '[[:space:]](and|but|or|yet)[[:space:]]') GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1524'
+ test_id: '1025'
+ test_type: Delimited_Data_Embedded
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" LIKE_REGEXPR '^([^,|' || NCHAR(9) || ']{1,20}[,|' || NCHAR(9) || ']){2,}[^,|' || NCHAR(9) || ']{0,20}([,|' || NCHAR(9) || ']{0,1}[^,|' || NCHAR(9) || ']{0,20})*$' AND NOT "{COLUMN_NAME}" LIKE_REGEXPR '[[:space:]](and|but|or|yet)[[:space:]]' GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Inconsistent_Casing.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Inconsistent_Casing.yaml
index 6443d845..c6f5e139 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Inconsistent_Casing.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Inconsistent_Casing.yaml
@@ -61,11 +61,11 @@ profile_anomaly_types:
lookup_type: null
lookup_query: |-
SELECT TOP {LIMIT_2} 'Upper Case' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
- WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}"
+ WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}" COLLATE Latin1_General_BIN
GROUP BY "{COLUMN_NAME}"
UNION ALL
SELECT TOP {LIMIT_2} 'Mixed Case' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
- WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
+ WHERE "{COLUMN_NAME}" COLLATE Latin1_General_BIN <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" COLLATE Latin1_General_BIN <> LOWER("{COLUMN_NAME}")
GROUP BY "{COLUMN_NAME}"
error_type: Profile Anomaly
- id: '1259'
@@ -124,3 +124,19 @@ profile_anomaly_types:
WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT_2})
error_type: Profile Anomaly
+ - id: '1526'
+ test_id: '1028'
+ test_type: Inconsistent_Casing
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT * FROM (SELECT 'Upper Case' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}" GROUP BY "{COLUMN_NAME}" FETCH FIRST {LIMIT_2} ROWS ONLY) UNION ALL SELECT * FROM (SELECT 'Mixed Case' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}") GROUP BY "{COLUMN_NAME}" FETCH FIRST {LIMIT_2} ROWS ONLY)
+ error_type: Profile Anomaly
+ - id: '1526'
+ test_id: '1028'
+ test_type: Inconsistent_Casing
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT * FROM (SELECT 'Upper Case' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}" GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT_2}) UNION ALL SELECT * FROM (SELECT 'Mixed Case' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}") GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT_2})
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Invalid_Zip3_USA.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Invalid_Zip3_USA.yaml
index 876661df..8f8215c0 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Invalid_Zip3_USA.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Invalid_Zip3_USA.yaml
@@ -81,3 +81,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE TRANSLATE("{COLUMN_NAME}",'012345678','999999999') <> '999' GROUP BY "{COLUMN_NAME}" ORDER BY count DESC, "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1523'
+ test_id: '1024'
+ test_type: Invalid_Zip3_USA
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE TRANSLATE("{COLUMN_NAME}",'012345678','999999999') <> '999' GROUP BY "{COLUMN_NAME}" ORDER BY count DESC, "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1523'
+ test_id: '1024'
+ test_type: Invalid_Zip3_USA
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE REPLACE_REGEXPR('[0-9]' IN "{COLUMN_NAME}" WITH '9') <> '999' GROUP BY "{COLUMN_NAME}" ORDER BY count DESC, "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Invalid_Zip_USA.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Invalid_Zip_USA.yaml
index 400424a9..a4aeaa62 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Invalid_Zip_USA.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Invalid_Zip_USA.yaml
@@ -77,3 +77,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE TRANSLATE("{COLUMN_NAME}",'012345678','999999999') NOT IN ('99999', '999999999', '99999-9999') GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1502'
+ test_id: '1003'
+ test_type: Invalid_Zip_USA
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE TRANSLATE("{COLUMN_NAME}",'012345678','999999999') NOT IN ('99999', '999999999', '99999-9999') GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1502'
+ test_id: '1003'
+ test_type: Invalid_Zip_USA
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE REPLACE_REGEXPR('[0-9]' IN "{COLUMN_NAME}" WITH '9') NOT IN ('99999', '999999999', '99999-9999') GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Leading_Spaces.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Leading_Spaces.yaml
index 4231f420..3f74cb98 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Leading_Spaces.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Leading_Spaces.yaml
@@ -77,3 +77,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN ' !' AND '!' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1508'
+ test_id: '1009'
+ test_type: Leading_Spaces
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN ' !' AND '!' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1508'
+ test_id: '1009'
+ test_type: Leading_Spaces
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN ' !' AND '!' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Multiple_Types_Major.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Multiple_Types_Major.yaml
index 9f3e805e..ddb6b8d6 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Multiple_Types_Major.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Multiple_Types_Major.yaml
@@ -91,3 +91,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT column_name, columns.table_name, CASE WHEN data_type ILIKE 'timestamp%' THEN lower(data_type) WHEN data_type ILIKE 'date' THEN lower(data_type) WHEN data_type ILIKE 'boolean' THEN 'boolean' WHEN data_type = 'TEXT' THEN 'varchar(' || CAST(character_maximum_length AS VARCHAR) || ')' WHEN data_type ILIKE 'char%' THEN 'char(' || CAST(character_maximum_length AS VARCHAR) || ')' WHEN data_type = 'NUMBER' AND numeric_precision = 38 AND numeric_scale = 0 THEN 'bigint' WHEN data_type ILIKE 'num%' THEN 'numeric(' || CAST(numeric_precision AS VARCHAR) || ',' || CAST(numeric_scale AS VARCHAR) || ')' ELSE data_type END AS data_type FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = '{TARGET_SCHEMA}' AND columns.column_name = '{COLUMN_NAME}' AND tables.table_type = 'BASE TABLE' ORDER BY data_type, table_name LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1504'
+ test_id: '1005'
+ test_type: Multiple_Types_Major
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT column_name, table_name, CASE WHEN data_type LIKE 'TIMESTAMP%%' THEN LOWER(data_type) WHEN data_type = 'DATE' THEN 'date' WHEN data_type = 'VARCHAR2' THEN 'varchar2(' || data_length || ')' WHEN data_type = 'CHAR' THEN 'char(' || data_length || ')' WHEN data_type = 'NUMBER' AND data_precision IS NULL THEN 'number' WHEN data_type = 'NUMBER' AND data_scale = 0 THEN 'number(' || data_precision || ')' WHEN data_type = 'NUMBER' THEN 'number(' || data_precision || ',' || data_scale || ')' ELSE data_type END AS data_type FROM all_tab_columns WHERE owner = '{TARGET_SCHEMA}' AND column_name = '{COLUMN_NAME}' ORDER BY data_type, table_name FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1504'
+ test_id: '1005'
+ test_type: Multiple_Types_Major
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT COLUMN_NAME, TABLE_NAME, CASE WHEN DATA_TYPE_NAME LIKE 'TIMESTAMP%%' THEN LOWER(DATA_TYPE_NAME) WHEN DATA_TYPE_NAME = 'DATE' THEN 'date' WHEN DATA_TYPE_NAME IN ('NVARCHAR', 'VARCHAR') THEN LOWER(DATA_TYPE_NAME) || '(' || LENGTH || ')' WHEN DATA_TYPE_NAME = 'CHAR' THEN 'char(' || LENGTH || ')' WHEN DATA_TYPE_NAME = 'DECIMAL' AND SCALE = 0 THEN 'decimal(' || LENGTH || ')' WHEN DATA_TYPE_NAME = 'DECIMAL' THEN 'decimal(' || LENGTH || ',' || SCALE || ')' WHEN DATA_TYPE_NAME IN ('INTEGER', 'BIGINT', 'SMALLINT', 'TINYINT') THEN LOWER(DATA_TYPE_NAME) ELSE LOWER(DATA_TYPE_NAME) END AS data_type FROM SYS.TABLE_COLUMNS WHERE SCHEMA_NAME = '{TARGET_SCHEMA}' AND COLUMN_NAME = '{COLUMN_NAME}' ORDER BY data_type, TABLE_NAME LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Multiple_Types_Minor.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Multiple_Types_Minor.yaml
index 1ddee506..17df28f3 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Multiple_Types_Minor.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Multiple_Types_Minor.yaml
@@ -91,3 +91,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT column_name, columns.table_name, CASE WHEN data_type ILIKE 'timestamp%' THEN lower(data_type) WHEN data_type ILIKE 'date' THEN lower(data_type) WHEN data_type ILIKE 'boolean' THEN 'boolean' WHEN data_type = 'TEXT' THEN 'varchar(' || CAST(character_maximum_length AS VARCHAR) || ')' WHEN data_type ILIKE 'char%' THEN 'char(' || CAST(character_maximum_length AS VARCHAR) || ')' WHEN data_type = 'NUMBER' AND numeric_precision = 38 AND numeric_scale = 0 THEN 'bigint' WHEN data_type ILIKE 'num%' THEN 'numeric(' || CAST(numeric_precision AS VARCHAR) || ',' || CAST(numeric_scale AS VARCHAR) || ')' ELSE data_type END AS data_type FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = '{TARGET_SCHEMA}' AND columns.column_name = '{COLUMN_NAME}' AND tables.table_type = 'BASE TABLE' ORDER BY data_type, table_name LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1503'
+ test_id: '1004'
+ test_type: Multiple_Types_Minor
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT column_name, table_name, CASE WHEN data_type LIKE 'TIMESTAMP%%' THEN LOWER(data_type) WHEN data_type = 'DATE' THEN 'date' WHEN data_type = 'VARCHAR2' THEN 'varchar2(' || data_length || ')' WHEN data_type = 'CHAR' THEN 'char(' || data_length || ')' WHEN data_type = 'NUMBER' AND data_precision IS NULL THEN 'number' WHEN data_type = 'NUMBER' AND data_scale = 0 THEN 'number(' || data_precision || ')' WHEN data_type = 'NUMBER' THEN 'number(' || data_precision || ',' || data_scale || ')' ELSE data_type END AS data_type FROM all_tab_columns WHERE owner = '{TARGET_SCHEMA}' AND column_name = '{COLUMN_NAME}' ORDER BY data_type, table_name FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1503'
+ test_id: '1004'
+ test_type: Multiple_Types_Minor
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT COLUMN_NAME, TABLE_NAME, CASE WHEN DATA_TYPE_NAME LIKE 'TIMESTAMP%%' THEN LOWER(DATA_TYPE_NAME) WHEN DATA_TYPE_NAME = 'DATE' THEN 'date' WHEN DATA_TYPE_NAME IN ('NVARCHAR', 'VARCHAR') THEN LOWER(DATA_TYPE_NAME) || '(' || LENGTH || ')' WHEN DATA_TYPE_NAME = 'CHAR' THEN 'char(' || LENGTH || ')' WHEN DATA_TYPE_NAME = 'DECIMAL' AND SCALE = 0 THEN 'decimal(' || LENGTH || ')' WHEN DATA_TYPE_NAME = 'DECIMAL' THEN 'decimal(' || LENGTH || ',' || SCALE || ')' WHEN DATA_TYPE_NAME IN ('INTEGER', 'BIGINT', 'SMALLINT', 'TINYINT') THEN LOWER(DATA_TYPE_NAME) ELSE LOWER(DATA_TYPE_NAME) END AS data_type FROM SYS.TABLE_COLUMNS WHERE SCHEMA_NAME = '{TARGET_SCHEMA}' AND COLUMN_NAME = '{COLUMN_NAME}' ORDER BY data_type, TABLE_NAME LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_No_Values.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_No_Values.yaml
index 87d80e61..0580df8c 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_No_Values.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_No_Values.yaml
@@ -79,3 +79,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1505'
+ test_id: '1006'
+ test_type: No_Values
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1505'
+ test_id: '1006'
+ test_type: No_Values
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Alpha_Name_Address.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Alpha_Name_Address.yaml
index 3cfd99ef..47297f76 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Alpha_Name_Address.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Alpha_Name_Address.yaml
@@ -91,3 +91,19 @@ profile_anomaly_types:
WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''
GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT}
error_type: Profile Anomaly
+ - id: '1527'
+ test_id: '1029'
+ test_type: Non_Alpha_Name_Address
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1527'
+ test_id: '1029'
+ test_type: Non_Alpha_Name_Address
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Alpha_Prefixed_Name.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Alpha_Prefixed_Name.yaml
index dbaa2631..1ad2aeb0 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Alpha_Prefixed_Name.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Alpha_Prefixed_Name.yaml
@@ -9,6 +9,7 @@ profile_anomaly_types:
min_text < 'A' AND LEFT(min_text, 1) NOT IN ('"', ' ') AND RIGHT(min_text, 1) <> '''' AND functional_data_type IN ('City', 'Person Given Name', 'Person Last Name', 'Person Full Name')
detail_expression: |-
'Minimum Value: ' || min_text
+ detail_redactable: true
issue_likelihood: Definite
suggested_action: |-
Values starting with a non-alphabetic character are highly likely to be invalid for this kind of column. This may indicate a file format change, error in an ingestion process, or incorrect source data. It could also indicate flagging or coding of some kind that can be broken out in a separate column in processed data. Review your pipeline process and source data to determine the root-cause. If this data accurately reflects source data, and upstream corrections are not possible, consider applying corrections directly to processed data where possible.
@@ -92,3 +93,19 @@ profile_anomaly_types:
WHERE "{COLUMN_NAME}" < 'A' AND LEFT("{COLUMN_NAME}", 1) NOT IN ('"', ' ') AND RIGHT("{COLUMN_NAME}", 1) <> ''''
GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT}
error_type: Profile Anomaly
+ - id: '1528'
+ test_id: '1030'
+ test_type: Non_Alpha_Prefixed_Name
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" < 'A' AND SUBSTR("{COLUMN_NAME}", 1, 1) NOT IN ('"', ' ') AND SUBSTR("{COLUMN_NAME}", -1, 1) <> '''' GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1528'
+ test_id: '1030'
+ test_type: Non_Alpha_Prefixed_Name
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" < 'A' AND SUBSTR("{COLUMN_NAME}", 1, 1) NOT IN ('"', ' ') AND SUBSTR("{COLUMN_NAME}", -1, 1) <> '''' GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Printing_Chars.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Printing_Chars.yaml
index a6118bed..3c2783fb 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Printing_Chars.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Printing_Chars.yaml
@@ -33,7 +33,7 @@ profile_anomaly_types:
'\u200f', '\x8207'),
'\u202f', '\x8239'),
'\u3000', '\x12288'),
- '\ufeff', '\x65279') as `{COLUMN_NAME}_content`,
+ '\ufeff', '\x65279') as `{COLUMN_NAME}`,
COUNT(*) as record_ct FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`
WHERE TRANSLATE(`{COLUMN_NAME}`, '\u00a0\u2009\u200b\u200c\u200d\u200e\u200f\u202f\u3000\ufeff', 'XXXXXXXXXX') <> `{COLUMN_NAME}`
GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` LIMIT {LIMIT}
@@ -54,7 +54,7 @@ profile_anomaly_types:
NCHAR(8207), '\x8207'),
NCHAR(8239), '\x8239'),
NCHAR(12288), '\x12288'),
- NCHAR(65279), '\x65279') AS "{COLUMN_NAME}_content",
+ NCHAR(65279), '\x65279') AS "{COLUMN_NAME}",
COUNT(*) AS record_ct
FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE TRANSLATE("{COLUMN_NAME}", NCHAR(160) + NCHAR(8201) + NCHAR(8203) + NCHAR(8204) + NCHAR(8205) + NCHAR(8206) + NCHAR(8207) + NCHAR(8239) + NCHAR(12288) + NCHAR(65279), 'XXXXXXXXXX') <> "{COLUMN_NAME}"
@@ -76,7 +76,7 @@ profile_anomaly_types:
CHR(8207), '\x8207'),
CHR(8239), '\x8239'),
CHR(12288), '\x12288'),
- CHR(65279), '\x65279') as "{COLUMN_NAME}_content",
+ CHR(65279), '\x65279') as "{COLUMN_NAME}",
COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), 'XXXXXXXXXX') <> "{COLUMN_NAME}"
GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT}
@@ -97,7 +97,7 @@ profile_anomaly_types:
CHR(8207), '\x8207'),
CHR(8239), '\x8239'),
CHR(12288), '\x12288'),
- CHR(65279), '\x65279') as "{COLUMN_NAME}_content",
+ CHR(65279), '\x65279') as "{COLUMN_NAME}",
COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), 'XXXXXXXXXX') <> "{COLUMN_NAME}"
GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT}
@@ -118,7 +118,7 @@ profile_anomaly_types:
CHR(8207), '\x8207'),
CHR(8239), '\x8239'),
CHR(12288), '\x12288'),
- CHR(65279), '\x65279') as "{COLUMN_NAME}_content",
+ CHR(65279), '\x65279') as "{COLUMN_NAME}",
COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), 'XXXXXXXXXX') <> "{COLUMN_NAME}"
GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT}
@@ -139,8 +139,24 @@ profile_anomaly_types:
CHR(8207), '\x8207'),
CHR(8239), '\x8239'),
CHR(12288), '\x12288'),
- CHR(65279), '\x65279') as "{COLUMN_NAME}_content",
+ CHR(65279), '\x65279') as "{COLUMN_NAME}",
COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), 'XXXXXXXXXX') <> "{COLUMN_NAME}"
GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT}
error_type: Profile Anomaly
+ - id: '1529'
+ test_id: '1031'
+ test_type: Non_Printing_Chars
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", UNISTR('\00A0'), '\x160'), UNISTR('\2009'), '\x8201'), UNISTR('\200B'), '\x8203'), UNISTR('\200C'), '\x8204'), UNISTR('\200D'), '\x8205'), UNISTR('\200E'), '\x8206'), UNISTR('\200F'), '\x8207'), UNISTR('\202F'), '\x8239'), UNISTR('\3000'), '\x12288'), UNISTR('\FEFF'), '\x65279') as "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE TRANSLATE("{COLUMN_NAME}", 'X' || UNISTR('\00A0') || UNISTR('\2009') || UNISTR('\200B') || UNISTR('\200C') || UNISTR('\200D') || UNISTR('\200E') || UNISTR('\200F') || UNISTR('\202F') || UNISTR('\3000') || UNISTR('\FEFF'), 'XXXXXXXXXXX') <> "{COLUMN_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1529'
+ test_id: '1031'
+ test_type: Non_Printing_Chars
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", NCHAR(160), '\x160'), NCHAR(8201), '\x8201'), NCHAR(8203), '\x8203'), NCHAR(8204), '\x8204'), NCHAR(8205), '\x8205'), NCHAR(8206), '\x8206'), NCHAR(8207), '\x8207'), NCHAR(8239), '\x8239'), NCHAR(12288), '\x12288'), NCHAR(65279), '\x65279') as "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", NCHAR(160), ''), NCHAR(8201), ''), NCHAR(8203), ''), NCHAR(8204), ''), NCHAR(8205), ''), NCHAR(8206), ''), NCHAR(8207), ''), NCHAR(8239), ''), NCHAR(12288), ''), NCHAR(65279), '') <> "{COLUMN_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Standard_Blanks.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Standard_Blanks.yaml
index 839c9fc8..b68be96d 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Standard_Blanks.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Non_Standard_Blanks.yaml
@@ -90,3 +90,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CASE WHEN "{COLUMN_NAME}" IN ('.', '?', ' ') THEN 1 WHEN LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP '-{2,}' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP '0{2,}' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP '9{2,}' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP 'x{2,}' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP 'z{2,}' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('blank','error','missing','tbd', 'n/a','#na','none','null','unknown') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('(blank)','(error)','(missing)','(tbd)', '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('[blank]','[error]','[missing]','[tbd]', '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1 WHEN "{COLUMN_NAME}" = '' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1501'
+ test_id: '1002'
+ test_type: Non_Standard_Blanks
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CASE WHEN "{COLUMN_NAME}" IN ('.', '?', ' ') THEN 1 WHEN REGEXP_LIKE(LOWER("{COLUMN_NAME}"), '(-{2,}|0{2,}|9{2,}|x{2,}|z{2,})') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('blank','error','missing','tbd', 'n/a','#na','none','null','unknown') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('(blank)','(error)','(missing)','(tbd)', '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('[blank]','[error]','[missing]','[tbd]', '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1501'
+ test_id: '1002'
+ test_type: Non_Standard_Blanks
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CASE WHEN "{COLUMN_NAME}" IN ('.', '?', ' ') THEN 1 WHEN LOWER("{COLUMN_NAME}") LIKE_REGEXPR '(-{2,}|0{2,}|9{2,}|x{2,}|z{2,})' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('blank','error','missing','tbd', 'n/a','#na','none','null','unknown') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('(blank)','(error)','(missing)','(tbd)', '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('[blank]','[error]','[missing]','[tbd]', '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Potential_Duplicates.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Potential_Duplicates.yaml
index 005957b5..b135f21a 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Potential_Duplicates.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Potential_Duplicates.yaml
@@ -79,3 +79,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" HAVING COUNT(*)> 1 ORDER BY COUNT(*) DESC LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1515'
+ test_id: '1016'
+ test_type: Potential_Duplicates
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" HAVING COUNT(*) > 1 ORDER BY COUNT(*) DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1515'
+ test_id: '1016'
+ test_type: Potential_Duplicates
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" HAVING COUNT(*) > 1 ORDER BY COUNT(*) DESC LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Potential_PII.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Potential_PII.yaml
index 7efb6ed9..e5742a68 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Potential_PII.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Potential_PII.yaml
@@ -77,3 +77,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1530'
+ test_id: '1100'
+ test_type: Potential_PII
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1530'
+ test_id: '1100'
+ test_type: Potential_PII
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Quoted_Values.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Quoted_Values.yaml
index 74a91f06..7c91fc79 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Quoted_Values.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Quoted_Values.yaml
@@ -78,3 +78,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE (CASE WHEN "{COLUMN_NAME}" ILIKE '"%"' OR "{COLUMN_NAME}" ILIKE '''%''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1509'
+ test_id: '1010'
+ test_type: Quoted_Values
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE (CASE WHEN "{COLUMN_NAME}" LIKE '"%%"' OR "{COLUMN_NAME}" LIKE '''%%''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1509'
+ test_id: '1010'
+ test_type: Quoted_Values
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE (CASE WHEN "{COLUMN_NAME}" LIKE '"%%"' OR "{COLUMN_NAME}" LIKE '''%%''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Recency_One_Year.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Recency_One_Year.yaml
index f6b3b36f..53a16368 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Recency_One_Year.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Recency_One_Year.yaml
@@ -9,6 +9,7 @@ profile_anomaly_types:
MAX(p.max_date) < CURRENT_DATE - INTERVAL '1 year'
detail_expression: |-
'Most Recent Date: ' || MAX(p.max_date)::VARCHAR
+ detail_redactable: true
issue_likelihood: Possible
suggested_action: |-
Review your source data and follow-up with data owners to determine whether dates in table should be more recent.
@@ -72,3 +73,19 @@ profile_anomaly_types:
lookup_query: |-
created_in_ui
error_type: Profile Anomaly
+ - id: '1518'
+ test_id: '1019'
+ test_type: Recency_One_Year
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ created_in_ui
+ error_type: Profile Anomaly
+ - id: '1518'
+ test_id: '1019'
+ test_type: Recency_One_Year
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ created_in_ui
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Recency_Six_Months.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Recency_Six_Months.yaml
index 7f13ef99..00467a7d 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Recency_Six_Months.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Recency_Six_Months.yaml
@@ -9,6 +9,7 @@ profile_anomaly_types:
MAX(p.max_date) >= CURRENT_DATE - INTERVAL '1 year' AND MAX(p.max_date) < CURRENT_DATE - INTERVAL '6 months'
detail_expression: |-
'Most Recent Date: ' || MAX(p.max_date)::VARCHAR
+ detail_redactable: true
issue_likelihood: Possible
suggested_action: |-
Review your source data and follow-up with data owners to determine whether dates in table should be more recent.
@@ -72,3 +73,19 @@ profile_anomaly_types:
lookup_query: |-
created_in_ui
error_type: Profile Anomaly
+ - id: '1519'
+ test_id: '1020'
+ test_type: Recency_Six_Months
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ created_in_ui
+ error_type: Profile Anomaly
+ - id: '1519'
+ test_id: '1020'
+ test_type: Recency_Six_Months
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ created_in_ui
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Small_Divergent_Value_Ct.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Small_Divergent_Value_Ct.yaml
index bd121c7c..39841b8e 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Small_Divergent_Value_Ct.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Small_Divergent_Value_Ct.yaml
@@ -70,3 +70,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1513'
+ test_id: '1014'
+ test_type: Small Divergent Value Ct
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1513'
+ test_id: '1014'
+ test_type: Small Divergent Value Ct
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Small_Missing_Value_Ct.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Small_Missing_Value_Ct.yaml
index 381c26c1..5a0d5ac8 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Small_Missing_Value_Ct.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Small_Missing_Value_Ct.yaml
@@ -73,3 +73,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE (CASE WHEN "{COLUMN_NAME}" IN ('.', '?', ' ') THEN 1 WHEN LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP '-{2,}' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP '0{2,}' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP '9{2,}' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP 'x{2,}' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP 'z{2,}' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('blank','error','missing','tbd', 'n/a','#na','none','null','unknown') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('(blank)','(error)','(missing)','(tbd)', '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('[blank]','[error]','[missing]','[tbd]', '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1 WHEN "{COLUMN_NAME}" = '' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1512'
+ test_id: '1013'
+ test_type: Small Missing Value Ct
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE (CASE WHEN "{COLUMN_NAME}" IN ('.', '?', ' ') THEN 1 WHEN REGEXP_LIKE(LOWER("{COLUMN_NAME}"), '(-{2,}|0{2,}|9{2,}|x{2,}|z{2,})') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('blank','error','missing','tbd', 'n/a','#na','none','null','unknown') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('(blank)','(error)','(missing)','(tbd)', '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('[blank]','[error]','[missing]','[tbd]', '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1512'
+ test_id: '1013'
+ test_type: Small Missing Value Ct
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE (CASE WHEN "{COLUMN_NAME}" IN ('.', '?', ' ') THEN 1 WHEN LOWER("{COLUMN_NAME}") LIKE_REGEXPR '(-{2,}|0{2,}|9{2,}|x{2,}|z{2,})' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('blank','error','missing','tbd', 'n/a','#na','none','null','unknown') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('(blank)','(error)','(missing)','(tbd)', '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN ('[blank]','[error]','[missing]','[tbd]', '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Small_Numeric_Value_Ct.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Small_Numeric_Value_Ct.yaml
index 3b7f394e..b205e34d 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Small_Numeric_Value_Ct.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Small_Numeric_Value_Ct.yaml
@@ -92,3 +92,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT A.* FROM (SELECT DISTINCT 'Numeric' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_NUM;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_2}) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT 'Non-Numeric' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_NUM;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_2}) AS B ORDER BY data_type, count DESC;
error_type: Profile Anomaly
+ - id: '1522'
+ test_id: '1023'
+ test_type: Small_Numeric_Value_Ct
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT A.* FROM (SELECT DISTINCT 'Numeric' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_NUM;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC FETCH FIRST {LIMIT_2} ROWS ONLY) A UNION ALL SELECT B.* FROM (SELECT DISTINCT 'Non-Numeric' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_NUM;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC FETCH FIRST {LIMIT_2} ROWS ONLY) B ORDER BY data_type, count DESC
+ error_type: Profile Anomaly
+ - id: '1522'
+ test_id: '1023'
+ test_type: Small_Numeric_Value_Ct
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT A.* FROM (SELECT DISTINCT 'Numeric' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_NUM;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_2}) A UNION ALL SELECT B.* FROM (SELECT DISTINCT 'Non-Numeric' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE <%IS_NUM;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT {LIMIT_2}) B ORDER BY data_type, count DESC
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Standardized_Value_Matches.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Standardized_Value_Matches.yaml
index 4f7b457b..a76ec345 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Standardized_Value_Matches.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Standardized_Value_Matches.yaml
@@ -31,12 +31,12 @@ profile_anomaly_types:
GROUP BY possible_standard_value
HAVING COUNT(DISTINCT `{COLUMN_NAME}`) > 1
)
- SELECT DISTINCT a.`{COLUMN_NAME}`, b.possible_standard_value, COUNT(*) AS count
+ SELECT DISTINCT a.`{COLUMN_NAME}`, COUNT(*) AS count
FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` a
JOIN cte b
ON UPPER(REGEXP_REPLACE(CAST(a.`{COLUMN_NAME}` AS STRING), r"[ '\.\-\,]", '')) = b.possible_standard_value
- GROUP BY a.`{COLUMN_NAME}`, b.possible_standard_value
- ORDER BY b.possible_standard_value ASC, count DESC
+ GROUP BY a.`{COLUMN_NAME}`
+ ORDER BY possible_standard_value ASC, count DESC
LIMIT {LIMIT};
error_type: Profile Anomaly
- id: '1289'
@@ -45,7 +45,7 @@ profile_anomaly_types:
sql_flavor: databricks
lookup_type: null
lookup_query: |-
- WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE(`{COLUMN_NAME}`, ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT `{COLUMN_NAME}`) FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` GROUP BY UPPER(TRANSLATE(`{COLUMN_NAME}`, ' '',.-', '')) HAVING COUNT(DISTINCT `{COLUMN_NAME}`) > 1 ) SELECT DISTINCT a.`{COLUMN_NAME}`, possible_standard_value, COUNT(*) AS count FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` a, cte b WHERE UPPER(TRANSLATE(a.`{COLUMN_NAME}`, ' '',.-', '')) = b.possible_standard_value GROUP BY a.`{COLUMN_NAME}`, possible_standard_value ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
+ WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE(`{COLUMN_NAME}`, ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT `{COLUMN_NAME}`) FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` GROUP BY UPPER(TRANSLATE(`{COLUMN_NAME}`, ' '',.-', '')) HAVING COUNT(DISTINCT `{COLUMN_NAME}`) > 1 ) SELECT DISTINCT a.`{COLUMN_NAME}`, COUNT(*) AS count FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` a, cte b WHERE UPPER(TRANSLATE(a.`{COLUMN_NAME}`, ' '',.-', '')) = b.possible_standard_value GROUP BY a.`{COLUMN_NAME}` ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
error_type: Profile Anomaly
- id: '1131'
test_id: '1017'
@@ -53,7 +53,7 @@ profile_anomaly_types:
sql_flavor: mssql
lookup_type: null
lookup_query: |-
- WITH CTE AS ( SELECT DISTINCT TOP {LIMIT} UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") as distinct_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", possible_standard_value, COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}", possible_standard_value ORDER BY possible_standard_value ASC, count DESC;
+ WITH CTE AS ( SELECT DISTINCT TOP {LIMIT} UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") as distinct_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC;
error_type: Profile Anomaly
- id: '1074'
test_id: '1017'
@@ -61,7 +61,7 @@ profile_anomaly_types:
sql_flavor: postgresql
lookup_type: null
lookup_query: |-
- WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", possible_standard_value, COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}", possible_standard_value ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
+ WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
error_type: Profile Anomaly
- id: '1049'
test_id: '1017'
@@ -69,7 +69,7 @@ profile_anomaly_types:
sql_flavor: redshift
lookup_type: null
lookup_query: |-
- WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", possible_standard_value, COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}", possible_standard_value ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
+ WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
error_type: Profile Anomaly
- id: '1449'
test_id: '1017'
@@ -77,7 +77,7 @@ profile_anomaly_types:
sql_flavor: redshift_spectrum
lookup_type: null
lookup_query: |-
- WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", possible_standard_value, COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}", possible_standard_value ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
+ WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
error_type: Profile Anomaly
- id: '1188'
test_id: '1017'
@@ -85,5 +85,21 @@ profile_anomaly_types:
sql_flavor: snowflake
lookup_type: null
lookup_query: |-
- WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", possible_standard_value, COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}", possible_standard_value ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
+ WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
+ error_type: Profile Anomaly
+ - id: '1516'
+ test_id: '1017'
+ test_type: Standardized_Value_Matches
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", 'X '',.-', 'X')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") AS cnt FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", 'X '',.-', 'X')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", 'X '',.-', 'X')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1516'
+ test_id: '1017'
+ test_type: Standardized_Value_Matches
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ WITH CTE AS ( SELECT DISTINCT UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") AS cnt FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(a."{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT}
error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Suggested_Type.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Suggested_Type.yaml
index 0016e44d..551391eb 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Suggested_Type.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Suggested_Type.yaml
@@ -78,3 +78,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1500'
+ test_id: '1001'
+ test_type: Suggested_Type
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1500'
+ test_id: '1001'
+ test_type: Suggested_Type
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Table_Pattern_Mismatch.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Table_Pattern_Mismatch.yaml
index 8771cd40..0a917305 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Table_Pattern_Mismatch.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Table_Pattern_Mismatch.yaml
@@ -88,3 +88,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT column_name, columns.table_name FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = '{TARGET_SCHEMA}' AND columns.column_name = '{COLUMN_NAME}' AND UPPER(tables.table_type) = 'BASE TABLE' ORDER BY table_name LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1507'
+ test_id: '1008'
+ test_type: Table_Pattern_Mismatch
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT column_name, table_name FROM all_tab_columns WHERE owner = '{TARGET_SCHEMA}' AND column_name = '{COLUMN_NAME}' ORDER BY table_name FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1507'
+ test_id: '1008'
+ test_type: Table_Pattern_Mismatch
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT COLUMN_NAME, TABLE_NAME FROM SYS.TABLE_COLUMNS WHERE SCHEMA_NAME = '{TARGET_SCHEMA}' AND COLUMN_NAME = '{COLUMN_NAME}' ORDER BY TABLE_NAME LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Unexpected_Emails.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Unexpected_Emails.yaml
index 1c5bbf16..ced5139f 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Unexpected_Emails.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Unexpected_Emails.yaml
@@ -10,6 +10,7 @@ profile_anomaly_types:
AND NOT (p.column_name ILIKE '%email%' OR p.column_name ILIKE '%addr%')
detail_expression: |-
'Value Range: ' || p.min_text || ' thru ' || max_text
+ detail_redactable: true
issue_likelihood: Possible
suggested_action: |-
Review your source data and follow-up with data owners to determine whether column should be populated with email addresses.
@@ -77,3 +78,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1521'
+ test_id: '1022'
+ test_type: Unexpected Emails
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1521'
+ test_id: '1022'
+ test_type: Unexpected Emails
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Unexpected_US_States.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Unexpected_US_States.yaml
index 68e6e2e1..b98e4d61 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Unexpected_US_States.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Unexpected_US_States.yaml
@@ -12,6 +12,7 @@ profile_anomaly_types:
detail_expression: "'Value Range: ' || p.min_text || ' thru ' || max_text || CASE\
\ WHEN p.top_freq_values > '' THEN ', Top Freq Values: ' || REPLACE(p.top_freq_values,\
\ CHR(10), ' ; ') ELSE '' END "
+ detail_redactable: true
issue_likelihood: Possible
suggested_action: |-
Review your source data and follow-up with data owners to determine whether column should be populated with US states.
@@ -79,3 +80,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1520'
+ test_id: '1021'
+ test_type: Unexpected US States
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1520'
+ test_id: '1021'
+ test_type: Unexpected US States
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Unlikely_Date_Values.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Unlikely_Date_Values.yaml
index ea033f96..84d3bc5b 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Unlikely_Date_Values.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Unlikely_Date_Values.yaml
@@ -11,6 +11,7 @@ profile_anomaly_types:
OR p.max_date > CURRENT_DATE + INTERVAL '30 year')
detail_expression: |-
'Date Range: ' || p.min_date::VARCHAR || ' thru ' || p.max_date::VARCHAR
+ detail_redactable: true
issue_likelihood: Likely
suggested_action: |-
Review your source data and follow-up with data owners to determine whether this data needs to be corrected or removed.
@@ -81,3 +82,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", '{PROFILE_RUN_DATE}' :: DATE AS profile_run_date, COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a WHERE ("{COLUMN_NAME}" < '1900-01-01'::DATE) OR ("{COLUMN_NAME}" > '{PROFILE_RUN_DATE}' :: DATE + INTERVAL '30 year' ) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1517'
+ test_id: '1018'
+ test_type: Unlikely_Date_Values
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", TO_DATE('{PROFILE_RUN_DATE}', 'YYYY-MM-DD') AS profile_run_date, COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a WHERE ("{COLUMN_NAME}" < TO_DATE('1900-01-01', 'YYYY-MM-DD')) OR ("{COLUMN_NAME}" > ADD_MONTHS(TO_DATE('{PROFILE_RUN_DATE}', 'YYYY-MM-DD'), 360)) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1517'
+ test_id: '1018'
+ test_type: Unlikely_Date_Values
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", TO_DATE('{PROFILE_RUN_DATE}', 'YYYY-MM-DD') AS profile_run_date, COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a WHERE ("{COLUMN_NAME}" < TO_DATE('1900-01-01', 'YYYY-MM-DD')) OR ("{COLUMN_NAME}" > ADD_MONTHS(TO_DATE('{PROFILE_RUN_DATE}', 'YYYY-MM-DD'), 360)) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Variant_Coded_Values.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Variant_Coded_Values.yaml
index 7ba71123..a5b8519f 100644
--- a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Variant_Coded_Values.yaml
+++ b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Variant_Coded_Values.yaml
@@ -81,3 +81,19 @@ profile_anomaly_types:
lookup_query: |-
SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE lower("{COLUMN_NAME}") IN (SELECT trim(value) FROM TABLE (FLATTEN(INPUT => SPLIT(SUBSTRING('{DETAIL_EXPRESSION}', POSITION(':', '{DETAIL_EXPRESSION}') + 2), '|'))) ) GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Profile Anomaly
+ - id: '1525'
+ test_id: '1027'
+ test_type: Variant_Coded_Values
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE LOWER("{COLUMN_NAME}") IN (SELECT TRIM(REGEXP_SUBSTR(SUBSTR('{DETAIL_EXPRESSION}', INSTR('{DETAIL_EXPRESSION}', ':') + 2), '[^|]+', 1, LEVEL)) FROM DUAL CONNECT BY LEVEL <= REGEXP_COUNT(SUBSTR('{DETAIL_EXPRESSION}', INSTR('{DETAIL_EXPRESSION}', ':') + 2), '[^|]+')) GROUP BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Profile Anomaly
+ - id: '1525'
+ test_id: '1027'
+ test_type: Variant_Coded_Values
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ WITH val_list(token, remaining) AS ( SELECT CASE WHEN LOCATE(SUBSTR('{DETAIL_EXPRESSION}', LOCATE('{DETAIL_EXPRESSION}', ':') + 2), '|') > 0 THEN TRIM(SUBSTR(SUBSTR('{DETAIL_EXPRESSION}', LOCATE('{DETAIL_EXPRESSION}', ':') + 2), 1, LOCATE(SUBSTR('{DETAIL_EXPRESSION}', LOCATE('{DETAIL_EXPRESSION}', ':') + 2), '|') - 1)) ELSE TRIM(SUBSTR('{DETAIL_EXPRESSION}', LOCATE('{DETAIL_EXPRESSION}', ':') + 2)) END AS token, CASE WHEN LOCATE(SUBSTR('{DETAIL_EXPRESSION}', LOCATE('{DETAIL_EXPRESSION}', ':') + 2), '|') > 0 THEN SUBSTR(SUBSTR('{DETAIL_EXPRESSION}', LOCATE('{DETAIL_EXPRESSION}', ':') + 2), LOCATE(SUBSTR('{DETAIL_EXPRESSION}', LOCATE('{DETAIL_EXPRESSION}', ':') + 2), '|') + 1) ELSE '' END AS remaining FROM DUMMY UNION ALL SELECT CASE WHEN LOCATE(remaining, '|') > 0 THEN TRIM(SUBSTR(remaining, 1, LOCATE(remaining, '|') - 1)) ELSE TRIM(remaining) END AS token, CASE WHEN LOCATE(remaining, '|') > 0 THEN SUBSTR(remaining, LOCATE(remaining, '|') + 1) ELSE '' END AS remaining FROM val_list WHERE LENGTH(remaining) > 0 ) SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE LOWER("{COLUMN_NAME}") IN (SELECT token FROM val_list) GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Profile Anomaly
diff --git a/testgen/template/dbsetup_test_types/test_types_Aggregate_Balance.yaml b/testgen/template/dbsetup_test_types/test_types_Aggregate_Balance.yaml
index 3fe5b288..83e0ec45 100644
--- a/testgen/template/dbsetup_test_types/test_types_Aggregate_Balance.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Aggregate_Balance.yaml
@@ -215,6 +215,56 @@ test_types:
ORDER BY {GROUPBY_NAMES}
LIMIT {LIMIT};
error_type: Test Results
+ - id: '8500'
+ test_id: '1500'
+ test_type: Aggregate_Balance
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT *
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+ FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE total <> match_total OR (total IS NOT NULL AND match_total IS NULL) OR (total IS NULL AND match_total IS NOT NULL)
+ ORDER BY {GROUPBY_NAMES}
+ FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8500'
+ test_id: '1500'
+ test_type: Aggregate_Balance
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT *
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+ FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE total <> match_total OR (total IS NOT NULL AND match_total IS NULL) OR (total IS NULL AND match_total IS NOT NULL)
+ ORDER BY {GROUPBY_NAMES}
+ LIMIT {LIMIT}
+ error_type: Test Results
test_templates:
- id: '2506'
test_type: Aggregate_Balance
@@ -559,3 +609,91 @@ test_types:
WHERE total <> match_total
OR (total IS NOT NULL AND match_total IS NULL)
OR (total IS NULL AND match_total IS NOT NULL);
+ - id: '8006'
+ test_type: Aggregate_Balance
+ sql_flavor: oracle
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
+ FROM {QUOTE}{MATCH_SCHEMA_NAME}{QUOTE}.{QUOTE}{MATCH_TABLE_NAME}{QUOTE}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE total <> match_total
+ OR (total IS NOT NULL AND match_total IS NULL)
+ OR (total IS NULL AND match_total IS NOT NULL)
+ - id: '8006'
+ test_type: Aggregate_Balance
+ sql_flavor: sap_hana
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
+ FROM {QUOTE}{MATCH_SCHEMA_NAME}{QUOTE}.{QUOTE}{MATCH_TABLE_NAME}{QUOTE}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE total <> match_total
+ OR (total IS NOT NULL AND match_total IS NULL)
+ OR (total IS NULL AND match_total IS NOT NULL)
diff --git a/testgen/template/dbsetup_test_types/test_types_Aggregate_Balance_Percent.yaml b/testgen/template/dbsetup_test_types/test_types_Aggregate_Balance_Percent.yaml
index f5fc0618..59b127bb 100644
--- a/testgen/template/dbsetup_test_types/test_types_Aggregate_Balance_Percent.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Aggregate_Balance_Percent.yaml
@@ -229,6 +229,60 @@ test_types:
ORDER BY {GROUPBY_NAMES}
LIMIT {LIMIT};
error_type: Test Results
+ - id: '8504'
+ test_id: '1504'
+ test_type: Aggregate_Balance_Percent
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT *
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+ FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE (total IS NOT NULL AND match_total IS NULL)
+ OR (total IS NULL AND match_total IS NOT NULL)
+ OR (total NOT BETWEEN match_total * (1 + {LOWER_TOLERANCE}/100.0) AND match_total * (1 + {UPPER_TOLERANCE}/100.0))
+ ORDER BY {GROUPBY_NAMES}
+ FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8504'
+ test_id: '1504'
+ test_type: Aggregate_Balance_Percent
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT *
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+ FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE (total IS NOT NULL AND match_total IS NULL)
+ OR (total IS NULL AND match_total IS NOT NULL)
+ OR (total NOT BETWEEN match_total * (1 + {LOWER_TOLERANCE}/100.0) AND match_total * (1 + {UPPER_TOLERANCE}/100.0))
+ ORDER BY {GROUPBY_NAMES}
+ LIMIT {LIMIT}
+ error_type: Test Results
test_templates:
- id: '2509'
test_type: Aggregate_Balance_Percent
@@ -573,3 +627,91 @@ test_types:
WHERE (total IS NOT NULL AND match_total IS NULL)
OR (total IS NULL AND match_total IS NOT NULL)
OR (total NOT BETWEEN match_total * (1 + {LOWER_TOLERANCE}/100.0) AND match_total * (1 + {UPPER_TOLERANCE}/100.0));
+ - id: '8009'
+ test_type: Aggregate_Balance_Percent
+ sql_flavor: oracle
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
+ FROM {QUOTE}{MATCH_SCHEMA_NAME}{QUOTE}.{QUOTE}{MATCH_TABLE_NAME}{QUOTE}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE (total IS NOT NULL AND match_total IS NULL)
+ OR (total IS NULL AND match_total IS NOT NULL)
+ OR (total NOT BETWEEN match_total * (1 + {LOWER_TOLERANCE}/100.0) AND match_total * (1 + {UPPER_TOLERANCE}/100.0))
+ - id: '8009'
+ test_type: Aggregate_Balance_Percent
+ sql_flavor: sap_hana
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
+ FROM {QUOTE}{MATCH_SCHEMA_NAME}{QUOTE}.{QUOTE}{MATCH_TABLE_NAME}{QUOTE}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE (total IS NOT NULL AND match_total IS NULL)
+ OR (total IS NULL AND match_total IS NOT NULL)
+ OR (total NOT BETWEEN match_total * (1 + {LOWER_TOLERANCE}/100.0) AND match_total * (1 + {UPPER_TOLERANCE}/100.0))
diff --git a/testgen/template/dbsetup_test_types/test_types_Aggregate_Balance_Range.yaml b/testgen/template/dbsetup_test_types/test_types_Aggregate_Balance_Range.yaml
index 9d594da4..c868d3cd 100644
--- a/testgen/template/dbsetup_test_types/test_types_Aggregate_Balance_Range.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Aggregate_Balance_Range.yaml
@@ -229,6 +229,60 @@ test_types:
ORDER BY {GROUPBY_NAMES}
LIMIT {LIMIT};
error_type: Test Results
+ - id: '8505'
+ test_id: '1505'
+ test_type: Aggregate_Balance_Range
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT *
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+ FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE (total IS NOT NULL AND match_total IS NULL)
+ OR (total IS NULL AND match_total IS NOT NULL)
+ OR (total NOT BETWEEN match_total + {LOWER_TOLERANCE} AND match_total + {UPPER_TOLERANCE})
+ ORDER BY {GROUPBY_NAMES}
+ FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8505'
+ test_id: '1505'
+ test_type: Aggregate_Balance_Range
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT *
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+ FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE (total IS NOT NULL AND match_total IS NULL)
+ OR (total IS NULL AND match_total IS NOT NULL)
+ OR (total NOT BETWEEN match_total + {LOWER_TOLERANCE} AND match_total + {UPPER_TOLERANCE})
+ ORDER BY {GROUPBY_NAMES}
+ LIMIT {LIMIT}
+ error_type: Test Results
test_templates:
- id: '2510'
test_type: Aggregate_Balance_Range
@@ -573,3 +627,91 @@ test_types:
WHERE (total IS NOT NULL AND match_total IS NULL)
OR (total IS NULL AND match_total IS NOT NULL)
OR (total NOT BETWEEN match_total + {LOWER_TOLERANCE} AND match_total + {UPPER_TOLERANCE});
+ - id: '8010'
+ test_type: Aggregate_Balance_Range
+ sql_flavor: oracle
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
+ FROM {QUOTE}{MATCH_SCHEMA_NAME}{QUOTE}.{QUOTE}{MATCH_TABLE_NAME}{QUOTE}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE (total IS NOT NULL AND match_total IS NULL)
+ OR (total IS NULL AND match_total IS NOT NULL)
+ OR (total NOT BETWEEN match_total + {LOWER_TOLERANCE} AND match_total + {UPPER_TOLERANCE})
+ - id: '8010'
+ test_type: Aggregate_Balance_Range
+ sql_flavor: sap_hana
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
+ FROM {QUOTE}{MATCH_SCHEMA_NAME}{QUOTE}.{QUOTE}{MATCH_TABLE_NAME}{QUOTE}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE (total IS NOT NULL AND match_total IS NULL)
+ OR (total IS NULL AND match_total IS NOT NULL)
+ OR (total NOT BETWEEN match_total + {LOWER_TOLERANCE} AND match_total + {UPPER_TOLERANCE})
diff --git a/testgen/template/dbsetup_test_types/test_types_Aggregate_Minimum.yaml b/testgen/template/dbsetup_test_types/test_types_Aggregate_Minimum.yaml
index 676052a2..49e1b39a 100644
--- a/testgen/template/dbsetup_test_types/test_types_Aggregate_Minimum.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Aggregate_Minimum.yaml
@@ -215,6 +215,56 @@ test_types:
ORDER BY {GROUPBY_NAMES}
LIMIT {LIMIT};
error_type: Test Results
+ - id: '8501'
+ test_id: '1501'
+ test_type: Aggregate_Minimum
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT *
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
+ FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE total < match_total OR (total IS NULL AND match_total IS NOT NULL)
+ ORDER BY {GROUPBY_NAMES}
+ FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8501'
+ test_id: '1501'
+ test_type: Aggregate_Minimum
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT *
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
+ FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE total < match_total OR (total IS NULL AND match_total IS NOT NULL)
+ ORDER BY {GROUPBY_NAMES}
+ LIMIT {LIMIT}
+ error_type: Test Results
test_templates:
- id: '2502'
test_type: Aggregate_Minimum
@@ -559,3 +609,91 @@ test_types:
WHERE total < match_total
-- OR (total IS NOT NULL AND match_total IS NULL) -- New categories
OR (total IS NULL AND match_total IS NOT NULL); -- Dropped categories
+ - id: '8002'
+ test_type: Aggregate_Minimum
+ sql_flavor: oracle
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
+ FROM {QUOTE}{MATCH_SCHEMA_NAME}{QUOTE}.{QUOTE}{MATCH_TABLE_NAME}{QUOTE}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE total < match_total
+ -- OR (total IS NOT NULL AND match_total IS NULL) -- New categories
+ OR (total IS NULL AND match_total IS NOT NULL)
+ - id: '8002'
+ test_type: Aggregate_Minimum
+ sql_flavor: sap_hana
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
+ FROM
+ ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ {HAVING_CONDITION}
+ UNION ALL
+ SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
+ FROM {QUOTE}{MATCH_SCHEMA_NAME}{QUOTE}.{QUOTE}{MATCH_TABLE_NAME}{QUOTE}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION} ) a
+ GROUP BY {GROUPBY_NAMES} ) s
+ WHERE total < match_total
+ -- OR (total IS NOT NULL AND match_total IS NULL) -- New categories
+ OR (total IS NULL AND match_total IS NOT NULL)
diff --git a/testgen/template/dbsetup_test_types/test_types_Alpha_Trunc.yaml b/testgen/template/dbsetup_test_types/test_types_Alpha_Trunc.yaml
index aa070119..23d43989 100644
--- a/testgen/template/dbsetup_test_types/test_types_Alpha_Trunc.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Alpha_Trunc.yaml
@@ -100,6 +100,22 @@ test_types:
test_operator: <
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8001'
+ test_type: Alpha_Trunc
+ sql_flavor: oracle
+ measure: |-
+ MAX(LENGTH({COLUMN_NAME}))
+ test_operator: <
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8001'
+ test_type: Alpha_Trunc
+ sql_flavor: sap_hana
+ measure: |-
+ MAX(LENGTH({COLUMN_NAME}))
+ test_operator: <
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1364'
test_id: '1004'
@@ -164,4 +180,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}" , LEN("{COLUMN_NAME}") as current_max_length, {THRESHOLD_VALUE} as previous_max_length FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT MAX(LEN("{COLUMN_NAME}")) as max_length FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") a WHERE LEN("{COLUMN_NAME}") = a.max_length AND a.max_length < {THRESHOLD_VALUE} LIMIT {LIMIT};
error_type: Test Results
+ - id: '8001'
+ test_id: '1004'
+ test_type: Alpha_Trunc
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", LENGTH("{COLUMN_NAME}") as current_max_length, {THRESHOLD_VALUE} as previous_max_length FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT MAX(LENGTH("{COLUMN_NAME}")) as max_length FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") a WHERE LENGTH("{COLUMN_NAME}") = a.max_length AND a.max_length < {THRESHOLD_VALUE} FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8001'
+ test_id: '1004'
+ test_type: Alpha_Trunc
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", LENGTH("{COLUMN_NAME}") as current_max_length, {THRESHOLD_VALUE} as previous_max_length FROM "{TARGET_SCHEMA}"."{TABLE_NAME}", (SELECT MAX(LENGTH("{COLUMN_NAME}")) as max_length FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") a WHERE LENGTH("{COLUMN_NAME}") = a.max_length AND a.max_length < {THRESHOLD_VALUE} LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Avg_Shift.yaml b/testgen/template/dbsetup_test_types/test_types_Avg_Shift.yaml
index 367c833c..a224d3b6 100644
--- a/testgen/template/dbsetup_test_types/test_types_Avg_Shift.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Avg_Shift.yaml
@@ -101,6 +101,22 @@ test_types:
test_operator: '>='
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8002'
+ test_type: Avg_Shift
+ sql_flavor: oracle
+ measure: |-
+ ABS( (AVG(CAST({COLUMN_NAME} AS NUMBER)) - {BASELINE_AVG}) / SQRT(((COUNT({COLUMN_NAME})-1)*POWER(STDDEV({COLUMN_NAME}),2) + ({BASELINE_VALUE_CT}-1) * POWER({BASELINE_SD},2)) /NULLIF(COUNT({COLUMN_NAME}) + {BASELINE_VALUE_CT}, 0) ))
+ test_operator: '>='
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8002'
+ test_type: Avg_Shift
+ sql_flavor: sap_hana
+ measure: |-
+ ABS( (AVG(CAST({COLUMN_NAME} AS DECIMAL)) - {BASELINE_AVG}) / SQRT(((COUNT({COLUMN_NAME})-1)*POWER(STDDEV({COLUMN_NAME}),2) + ({BASELINE_VALUE_CT}-1) * POWER({BASELINE_SD},2)) /NULLIF(COUNT({COLUMN_NAME}) + {BASELINE_VALUE_CT}, 0) ))
+ test_operator: '>='
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1365'
test_id: '1005'
@@ -159,4 +175,20 @@ test_types:
lookup_query: |-
SELECT AVG("{COLUMN_NAME}" :: FLOAT) AS current_average FROM "{TARGET_SCHEMA}"."{TABLE_NAME}";
error_type: Test Results
+ - id: '8002'
+ test_id: '1005'
+ test_type: Avg_Shift
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT AVG(CAST("{COLUMN_NAME}" AS NUMBER)) AS current_average FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ error_type: Test Results
+ - id: '8002'
+ test_id: '1005'
+ test_type: Avg_Shift
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT AVG(CAST("{COLUMN_NAME}" AS DECIMAL)) AS current_average FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_CUSTOM.yaml b/testgen/template/dbsetup_test_types/test_types_CUSTOM.yaml
index fbfa7fa1..8e752a67 100644
--- a/testgen/template/dbsetup_test_types/test_types_CUSTOM.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_CUSTOM.yaml
@@ -19,12 +19,12 @@ test_types:
Test Focus
column_name_help: |-
Specify a brief descriptor of the focus of this test that is unique within this Test Suite for the Table and Test Type. This distinguishes this test from others of the same type on the same table. Example: `Order Total Matches Detail` if you are testing that the total in one table matches the sum of lines in another.
- default_parm_columns: custom_query
+ default_parm_columns: custom_query,match_column_names
default_parm_values: null
default_parm_prompts: |-
- Custom SQL Query Returning Error Records
+ Custom SQL Query Returning Error Records,PII Redactable Columns
default_parm_help: |-
- Query should return records indicating one or more errors. The test passes if no records are returned. Results of the query will be shown when you click `Review Source Data` for a failed test, so be sure to include enough data in your results to follow-up. A query can refer to any tables in the database. You must hard-code the schema or use `{DATA_SCHEMA}` to represent the schema defined for the Table Group.
+ Query should return records indicating one or more errors. The test passes if no records are returned. Results of the query will be shown when you click `Review Source Data` for a failed test, so be sure to include enough data in your results to follow-up. A query can refer to any tables in the database. You must hard-code the schema or use `{DATA_SCHEMA}` to represent the schema defined for the Table Group.|Comma-separated list of result column names that contain PII data and should be redacted for users without PII viewing permissions. Leave blank if no columns need redacting.
default_severity: Fail
run_type: QUERY
test_scope: custom
@@ -313,3 +313,71 @@ test_types:
FROM (
{CUSTOM_QUERY}
) TEST;
+ - id: '8004'
+ test_type: CUSTOM
+ sql_flavor: oracle
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ CASE
+ WHEN '{COLUMN_NAME_NO_QUOTES}' IS NULL THEN NULL
+ ELSE '{COLUMN_NAME_NO_QUOTES}'
+ END as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ /* TODO: 'custom_query= {CUSTOM_QUERY_ESCAPED}' as input_parameters, */
+ 'Skip_Errors={SKIP_ERRORS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM (
+ {CUSTOM_QUERY}
+ ) TEST
+ - id: '8004'
+ test_type: CUSTOM
+ sql_flavor: sap_hana
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ CASE
+ WHEN '{COLUMN_NAME_NO_QUOTES}' IS NULL THEN NULL
+ ELSE '{COLUMN_NAME_NO_QUOTES}'
+ END as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ /* TODO: 'custom_query= {CUSTOM_QUERY_ESCAPED}' as input_parameters, */
+ 'Skip_Errors={SKIP_ERRORS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM (
+ {CUSTOM_QUERY}
+ ) TEST
diff --git a/testgen/template/dbsetup_test_types/test_types_Combo_Match.yaml b/testgen/template/dbsetup_test_types/test_types_Combo_Match.yaml
index f9dffc4d..18bdde5d 100644
--- a/testgen/template/dbsetup_test_types/test_types_Combo_Match.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Combo_Match.yaml
@@ -192,6 +192,50 @@ test_types:
ORDER BY {COLUMN_NAME_NO_QUOTES}
LIMIT {LIMIT};
error_type: Test Results
+ - id: '8502'
+ test_id: '1502'
+ test_type: Combo_Match
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT *
+ FROM ( SELECT {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
+ {HAVING_CONDITION}
+ MINUS
+ SELECT {MATCH_GROUPBY_NAMES}
+ FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION}
+ ) test
+ ORDER BY {COLUMN_NAME_NO_QUOTES}
+ FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8502'
+ test_id: '1502'
+ test_type: Combo_Match
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT *
+ FROM ( SELECT {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
+ {HAVING_CONDITION}
+ EXCEPT
+ SELECT {MATCH_GROUPBY_NAMES}
+ FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION}
+ ) test
+ ORDER BY {COLUMN_NAME_NO_QUOTES}
+ LIMIT {LIMIT}
+ error_type: Test Results
test_templates:
- id: '2501'
test_type: Combo_Match
@@ -503,3 +547,81 @@ test_types:
GROUP BY {MATCH_GROUPBY_NAMES}
{MATCH_HAVING_CONDITION}
) test;
+ - id: '8001'
+ test_type: Combo_Match
+ sql_flavor: oracle
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM ( SELECT {COLUMN_NAME_NO_QUOTES}
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
+ {HAVING_CONDITION}
+ MINUS
+ SELECT {MATCH_GROUPBY_NAMES}
+ FROM {QUOTE}{MATCH_SCHEMA_NAME}{QUOTE}.{QUOTE}{MATCH_TABLE_NAME}{QUOTE}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION}
+ ) test
+ - id: '8001'
+ test_type: Combo_Match
+ sql_flavor: sap_hana
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM ( SELECT {COLUMN_NAME_NO_QUOTES}
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
+ {HAVING_CONDITION}
+ EXCEPT
+ SELECT {MATCH_GROUPBY_NAMES}
+ FROM {QUOTE}{MATCH_SCHEMA_NAME}{QUOTE}.{QUOTE}{MATCH_TABLE_NAME}{QUOTE}
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES}
+ {MATCH_HAVING_CONDITION}
+ ) test
diff --git a/testgen/template/dbsetup_test_types/test_types_Condition_Flag.yaml b/testgen/template/dbsetup_test_types/test_types_Condition_Flag.yaml
index 11125999..110b2226 100644
--- a/testgen/template/dbsetup_test_types/test_types_Condition_Flag.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Condition_Flag.yaml
@@ -101,6 +101,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8003'
+ test_type: Condition_Flag
+ sql_flavor: oracle
+ measure: |-
+ SUM(CASE WHEN {CUSTOM_QUERY} THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8003'
+ test_type: Condition_Flag
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(CASE WHEN {CUSTOM_QUERY} THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1366'
test_id: '1006'
@@ -161,4 +177,20 @@ test_types:
lookup_query: |-
SELECT * FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE {CUSTOM_QUERY} LIMIT {LIMIT};
error_type: Test Results
+ - id: '8006'
+ test_id: '1006'
+ test_type: Condition_Flag
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT * FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE {CUSTOM_QUERY} FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8006'
+ test_id: '1006'
+ test_type: Condition_Flag
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT * FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE {CUSTOM_QUERY} LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Constant.yaml b/testgen/template/dbsetup_test_types/test_types_Constant.yaml
index 2bdd1a04..7141bcfa 100644
--- a/testgen/template/dbsetup_test_types/test_types_Constant.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Constant.yaml
@@ -100,6 +100,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8004'
+ test_type: Constant
+ sql_flavor: oracle
+ measure: |-
+ SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8004'
+ test_type: Constant
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1367'
test_id: '1007'
@@ -161,4 +177,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" <> {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Test Results
+ - id: '8004'
+ test_id: '1007'
+ test_type: Constant
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" <> {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8004'
+ test_id: '1007'
+ test_type: Constant
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" <> {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Daily_Record_Ct.yaml b/testgen/template/dbsetup_test_types/test_types_Daily_Record_Ct.yaml
index 389bf0af..eeb64f32 100644
--- a/testgen/template/dbsetup_test_types/test_types_Daily_Record_Ct.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Daily_Record_Ct.yaml
@@ -45,30 +45,30 @@ test_types:
sql_flavor: bigquery
measure: |-
DATETIME_DIFF(DATETIME_TRUNC(SAFE_CAST(MAX({COLUMN_NAME}) AS DATE), DAY), DATETIME_TRUNC(SAFE_CAST(MIN({COLUMN_NAME}) AS DATE), DAY), DAY) + 1 - COUNT(DISTINCT DATETIME_TRUNC({COLUMN_NAME}, DAY))
- test_operator: <
+ test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
- id: '6005'
test_type: Daily_Record_Ct
sql_flavor: databricks
measure: |-
- <%DATEDIFF_DAY;MIN({COLUMN_NAME});MAX({COLUMN_NAME})%>+1-COUNT(DISTINCT {COLUMN_NAME})
- test_operator: <
+ <%DATEDIFF_DAY;MIN({COLUMN_NAME});MAX({COLUMN_NAME})%>+1-COUNT(DISTINCT CAST({COLUMN_NAME} AS DATE))
+ test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
- id: '3005'
test_type: Daily_Record_Ct
sql_flavor: mssql
measure: |-
- DATEDIFF(day, MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})
- test_operator: <
+ DATEDIFF(day, MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT CAST({COLUMN_NAME} AS DATE))
+ test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
- id: '4005'
test_type: Daily_Record_Ct
sql_flavor: postgresql
measure: |-
- <%DATEDIFF_DAY;MIN({COLUMN_NAME});MAX({COLUMN_NAME})%>+1-COUNT(DISTINCT {COLUMN_NAME})
+ <%DATEDIFF_DAY;MIN({COLUMN_NAME});MAX({COLUMN_NAME})%>+1-COUNT(DISTINCT CAST({COLUMN_NAME} AS DATE))
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -76,7 +76,7 @@ test_types:
test_type: Daily_Record_Ct
sql_flavor: redshift
measure: |-
- DATEDIFF('DAY', MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})
+ DATEDIFF('DAY', MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT CAST({COLUMN_NAME} AS DATE))
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -84,7 +84,7 @@ test_types:
test_type: Daily_Record_Ct
sql_flavor: redshift_spectrum
measure: |-
- DATEDIFF('DAY', MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})
+ DATEDIFF('DAY', MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT CAST({COLUMN_NAME} AS DATE))
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -92,15 +92,31 @@ test_types:
test_type: Daily_Record_Ct
sql_flavor: snowflake
measure: |-
- DATEDIFF(day, MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})
- test_operator: <
+ DATEDIFF(day, MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT CAST({COLUMN_NAME} AS DATE))
+ test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
- id: '5005'
test_type: Daily_Record_Ct
sql_flavor: trino
measure: |-
- DATE_DIFF('DAY', MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})
+ DATE_DIFF('DAY', MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT CAST({COLUMN_NAME} AS DATE))
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8005'
+ test_type: Daily_Record_Ct
+ sql_flavor: oracle
+ measure: |-
+ <%DATEDIFF_DAY;MIN({COLUMN_NAME});MAX({COLUMN_NAME})%>+1-COUNT(DISTINCT TRUNC({COLUMN_NAME}))
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8005'
+ test_type: Daily_Record_Ct
+ sql_flavor: sap_hana
+ measure: |-
+ <%DATEDIFF_DAY;MIN({COLUMN_NAME});MAX({COLUMN_NAME})%>+1-COUNT(DISTINCT CAST({COLUMN_NAME} AS DATE))
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -150,7 +166,7 @@ test_types:
sql_flavor: databricks
lookup_type: null
lookup_query: |-
- WITH date_bounds AS( SELECT MIN(`{COLUMN_NAME}`) AS min_date, MAX(`{COLUMN_NAME}`) AS max_date FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`), all_dates AS ( SELECT EXPLODE(SEQUENCE(min_date, max_date, INTERVAL 1 DAY)) AS all_dates FROM date_bounds ), existing_periods AS ( SELECT DISTINCT CAST(`{COLUMN_NAME}` AS DATE) AS period, COUNT(1) AS period_count FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` GROUP BY CAST(`{COLUMN_NAME}` AS DATE) ), missing_dates AS ( SELECT d.all_dates AS missing_period FROM all_dates d LEFT JOIN existing_periods e ON d.all_dates = e.period WHERE e.period IS NULL ) SELECT m.missing_period, MAX(e1.period) AS prior_available_date, MAX(e1.period_count) AS prior_available_date_count, MIN(e2.period) AS next_available_date, MAX(e2.period_count) AS next_available_date_count FROM missing_dates m LEFT JOIN existing_periods e1 ON e1.period < m.missing_period LEFT JOIN existing_periods e2 ON e2.period > m.missing_period GROUP BY m.missing_period ORDER BY m.missing_period LIMIT {LIMIT};
+ WITH date_bounds AS( SELECT CAST(MIN(`{COLUMN_NAME}`) AS DATE) AS min_date, CAST(MAX(`{COLUMN_NAME}`) AS DATE) AS max_date FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`), all_dates AS ( SELECT EXPLODE(SEQUENCE(min_date, max_date, INTERVAL 1 DAY)) AS all_dates FROM date_bounds ), existing_periods AS ( SELECT DISTINCT CAST(`{COLUMN_NAME}` AS DATE) AS period, COUNT(1) AS period_count FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` GROUP BY CAST(`{COLUMN_NAME}` AS DATE) ), missing_dates AS ( SELECT d.all_dates AS missing_period FROM all_dates d LEFT JOIN existing_periods e ON d.all_dates = e.period WHERE e.period IS NULL ) SELECT m.missing_period, MAX(e1.period) AS prior_available_date, MAX(e1.period_count) AS prior_available_date_count, MIN(e2.period) AS next_available_date, MAX(e2.period_count) AS next_available_date_count FROM missing_dates m LEFT JOIN existing_periods e1 ON e1.period < m.missing_period LEFT JOIN existing_periods e2 ON e2.period > m.missing_period GROUP BY m.missing_period ORDER BY m.missing_period LIMIT {LIMIT};
error_type: Test Results
- id: '1144'
test_id: '1009'
@@ -189,15 +205,13 @@ test_types:
FROM check_periods c
LEFT JOIN data_by_period d
ON (c.check_period = d.data_period) )
- SELECT TOP {LIMIT} check_period, record_ct,
+ SELECT TOP {LIMIT} check_period AS missing_period, record_ct,
CASE
WHEN record_ct = 0 THEN 'MISSING'
ELSE 'Present'
END as status
FROM data_by_prd_with_prior_next
WHERE record_ct = 0
- OR last_record_ct = 0
- OR next_record_ct = 0
ORDER BY check_period DESC;
error_type: Test Results
- id: '1087'
@@ -232,4 +246,20 @@ test_types:
lookup_query: |-
WITH RECURSIVE daterange(all_dates) AS (SELECT MIN("{COLUMN_NAME}") :: DATE AS all_dates FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" UNION ALL SELECT DATEADD(DAY, 1, d.all_dates) :: DATE AS all_dates FROM daterange d WHERE d.all_dates < (SELECT MAX("{COLUMN_NAME}") :: DATE FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") ), existing_periods AS ( SELECT DISTINCT "{COLUMN_NAME}" :: DATE AS period, COUNT(1) AS period_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" :: DATE ) SELECT p.missing_period, p.prior_available_date, e.period_count as prior_available_date_count, p.next_available_date, f.period_count as next_available_date_count FROM (SELECT d.all_dates AS missing_period, MAX(b.period) AS prior_available_date, MIN(c.period) AS next_available_date FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates) p LEFT JOIN existing_periods e ON (p.prior_available_date = e.period) LEFT JOIN existing_periods f ON (p.next_available_date = f.period) ORDER BY p.missing_period LIMIT {LIMIT};
error_type: Test Results
+ - id: '8009'
+ test_id: '1009'
+ test_type: Daily_Record_Ct
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ WITH daterange AS (SELECT (SELECT MIN(TRUNC("{COLUMN_NAME}")) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") + LEVEL - 1 AS all_dates FROM DUAL CONNECT BY LEVEL <= (SELECT MAX(TRUNC("{COLUMN_NAME}")) - MIN(TRUNC("{COLUMN_NAME}")) + 1 FROM "{TARGET_SCHEMA}"."{TABLE_NAME}")), existing_periods AS (SELECT DISTINCT TRUNC("{COLUMN_NAME}") AS period, COUNT(1) AS period_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY TRUNC("{COLUMN_NAME}")) SELECT p.missing_period, p.prior_available_date, e.period_count AS prior_available_date_count, p.next_available_date, f.period_count AS next_available_date_count FROM (SELECT d.all_dates AS missing_period, MAX(b.period) AS prior_available_date, MIN(c.period) AS next_available_date FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates) p LEFT JOIN existing_periods e ON (p.prior_available_date = e.period) LEFT JOIN existing_periods f ON (p.next_available_date = f.period) ORDER BY p.missing_period FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8009'
+ test_id: '1009'
+ test_type: Daily_Record_Ct
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ WITH Pass0 AS (SELECT 1 C FROM DUMMY UNION ALL SELECT 1 FROM DUMMY), Pass1 AS (SELECT 1 C FROM Pass0 A, Pass0 B), Pass2 AS (SELECT 1 C FROM Pass1 A, Pass1 B), Pass3 AS (SELECT 1 C FROM Pass2 A, Pass2 B), Pass4 AS (SELECT 1 C FROM Pass3 A, Pass3 B), nums AS (SELECT ROW_NUMBER() OVER (ORDER BY C) - 1 AS rn FROM Pass4), bounds AS (SELECT MIN(CAST("{COLUMN_NAME}" AS DATE)) AS min_date, MAX(CAST("{COLUMN_NAME}" AS DATE)) AS max_date FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), daterange AS (SELECT ADD_DAYS(b.min_date, n.rn) AS all_dates FROM bounds b, nums n WHERE ADD_DAYS(b.min_date, n.rn) <= b.max_date), existing_periods AS (SELECT DISTINCT CAST("{COLUMN_NAME}" AS DATE) AS period, COUNT(1) AS period_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY CAST("{COLUMN_NAME}" AS DATE)) SELECT p.missing_period, p.prior_available_date, e.period_count AS prior_available_date_count, p.next_available_date, f.period_count AS next_available_date_count FROM (SELECT d.all_dates AS missing_period, MAX(b.period) AS prior_available_date, MIN(c.period) AS next_available_date FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates) p LEFT JOIN existing_periods e ON (p.prior_available_date = e.period) LEFT JOIN existing_periods f ON (p.next_available_date = f.period) ORDER BY p.missing_period LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Dec_Trunc.yaml b/testgen/template/dbsetup_test_types/test_types_Dec_Trunc.yaml
index 02fe0dda..ac988b64 100644
--- a/testgen/template/dbsetup_test_types/test_types_Dec_Trunc.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Dec_Trunc.yaml
@@ -101,6 +101,22 @@ test_types:
test_operator: <
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8006'
+ test_type: Dec_Trunc
+ sql_flavor: oracle
+ measure: |-
+ SUM(ROUND(ABS(MOD({COLUMN_NAME}, 1)), 5))+1
+ test_operator: <
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8006'
+ test_type: Dec_Trunc
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(ROUND(ABS(MOD({COLUMN_NAME}, 1)), 5))+1
+ test_operator: <
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1369'
test_id: '1011'
@@ -166,4 +182,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT LENGTH(SPLIT_PART("{COLUMN_NAME}" :: TEXT, '.', 2)) AS decimal_scale, COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY decimal_scale LIMIT {LIMIT};
error_type: Test Results
+ - id: '8006'
+ test_id: '1011'
+ test_type: Dec_Trunc
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT CASE WHEN INSTR(TO_CHAR("{COLUMN_NAME}"), '.') > 0 THEN LENGTH(SUBSTR(TO_CHAR("{COLUMN_NAME}"), INSTR(TO_CHAR("{COLUMN_NAME}"), '.') + 1)) ELSE 0 END AS decimal_scale, COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY CASE WHEN INSTR(TO_CHAR("{COLUMN_NAME}"), '.') > 0 THEN LENGTH(SUBSTR(TO_CHAR("{COLUMN_NAME}"), INSTR(TO_CHAR("{COLUMN_NAME}"), '.') + 1)) ELSE 0 END FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8006'
+ test_id: '1011'
+ test_type: Dec_Trunc
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT CASE WHEN LOCATE(TO_VARCHAR("{COLUMN_NAME}"), '.') > 0 THEN LENGTH(SUBSTR(TO_VARCHAR("{COLUMN_NAME}"), LOCATE(TO_VARCHAR("{COLUMN_NAME}"), '.') + 1)) ELSE 0 END AS decimal_scale, COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY CASE WHEN LOCATE(TO_VARCHAR("{COLUMN_NAME}"), '.') > 0 THEN LENGTH(SUBSTR(TO_VARCHAR("{COLUMN_NAME}"), LOCATE(TO_VARCHAR("{COLUMN_NAME}"), '.') + 1)) ELSE 0 END LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Distinct_Date_Ct.yaml b/testgen/template/dbsetup_test_types/test_types_Distinct_Date_Ct.yaml
index 54be295e..1a9d8c82 100644
--- a/testgen/template/dbsetup_test_types/test_types_Distinct_Date_Ct.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Distinct_Date_Ct.yaml
@@ -101,6 +101,22 @@ test_types:
test_operator: <
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8007'
+ test_type: Distinct_Date_Ct
+ sql_flavor: oracle
+ measure: |-
+ COUNT(DISTINCT {COLUMN_NAME})
+ test_operator: <
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8007'
+ test_type: Distinct_Date_Ct
+ sql_flavor: sap_hana
+ measure: |-
+ COUNT(DISTINCT {COLUMN_NAME})
+ test_operator: <
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1370'
test_id: '1012'
@@ -163,4 +179,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT {LIMIT};
error_type: Test Results
+ - id: '8012'
+ test_id: '1012'
+ test_type: Distinct_Date_Ct
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8012'
+ test_id: '1012'
+ test_type: Distinct_Date_Ct
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Distinct_Value_Ct.yaml b/testgen/template/dbsetup_test_types/test_types_Distinct_Value_Ct.yaml
index 150289ab..ea1195ec 100644
--- a/testgen/template/dbsetup_test_types/test_types_Distinct_Value_Ct.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Distinct_Value_Ct.yaml
@@ -100,6 +100,22 @@ test_types:
test_operator: <>
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8008'
+ test_type: Distinct_Value_Ct
+ sql_flavor: oracle
+ measure: |-
+ COUNT(DISTINCT {COLUMN_NAME})
+ test_operator: <>
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8008'
+ test_type: Distinct_Value_Ct
+ sql_flavor: sap_hana
+ measure: |-
+ COUNT(DISTINCT {COLUMN_NAME})
+ test_operator: <>
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1371'
test_id: '1013'
@@ -162,4 +178,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT {LIMIT};
error_type: Test Results
+ - id: '8008'
+ test_id: '1013'
+ test_type: Distinct_Value_Ct
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8008'
+ test_id: '1013'
+ test_type: Distinct_Value_Ct
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NOT NULL GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Distribution_Shift.yaml b/testgen/template/dbsetup_test_types/test_types_Distribution_Shift.yaml
index b44fcd2d..6823fc52 100644
--- a/testgen/template/dbsetup_test_types/test_types_Distribution_Shift.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Distribution_Shift.yaml
@@ -44,6 +44,7 @@ test_types:
test_type: Distribution_Shift
sql_flavor: bigquery
lookup_type: null
+ lookup_redactable_columns: category
lookup_query: |-
WITH latest_ver AS (
SELECT {CONCAT_COLUMNS} AS category,
@@ -61,6 +62,7 @@ test_types:
test_type: Distribution_Shift
sql_flavor: databricks
lookup_type: null
+ lookup_redactable_columns: category
lookup_query: |-
WITH latest_ver
AS ( SELECT {CONCAT_COLUMNS} as category,
@@ -88,6 +90,7 @@ test_types:
test_type: Distribution_Shift
sql_flavor: mssql
lookup_type: null
+ lookup_redactable_columns: category
lookup_query: |-
WITH latest_ver
AS ( SELECT {CONCAT_COLUMNS} as category,
@@ -114,6 +117,7 @@ test_types:
test_type: Distribution_Shift
sql_flavor: postgresql
lookup_type: null
+ lookup_redactable_columns: category
lookup_query: |-
WITH latest_ver
AS ( SELECT {CONCAT_COLUMNS} as category,
@@ -141,6 +145,7 @@ test_types:
test_type: Distribution_Shift
sql_flavor: redshift
lookup_type: null
+ lookup_redactable_columns: category
lookup_query: |-
WITH latest_ver
AS ( SELECT {CONCAT_COLUMNS} as category,
@@ -168,6 +173,7 @@ test_types:
test_type: Distribution_Shift
sql_flavor: redshift_spectrum
lookup_type: null
+ lookup_redactable_columns: category
lookup_query: |-
WITH latest_ver
AS ( SELECT {CONCAT_COLUMNS} as category,
@@ -195,6 +201,7 @@ test_types:
test_type: Distribution_Shift
sql_flavor: snowflake
lookup_type: null
+ lookup_redactable_columns: category
lookup_query: |-
WITH latest_ver
AS ( SELECT {CONCAT_COLUMNS} as category,
@@ -217,6 +224,62 @@ test_types:
ORDER BY COALESCE(l.category, o.category)
LIMIT {LIMIT};
error_type: Test Results
+ - id: '8503'
+ test_id: '1503'
+ test_type: Distribution_Shift
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_redactable_columns: category
+ lookup_query: |-
+ WITH latest_ver
+ AS ( SELECT {CONCAT_COLUMNS} as category,
+ CAST(COUNT(*) AS NUMBER) / CAST(SUM(COUNT(*)) OVER () AS NUMBER) AS pct_of_total
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" v1
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {COLUMN_NAME_NO_QUOTES} ),
+ older_ver
+ AS ( SELECT {CONCAT_MATCH_GROUPBY} as category,
+ CAST(COUNT(*) AS NUMBER) / CAST(SUM(COUNT(*)) OVER () AS NUMBER) AS pct_of_total
+ FROM {MATCH_SCHEMA_NAME}.{TABLE_NAME} v2
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES} )
+ SELECT COALESCE(l.category, o.category) AS category,
+ o.pct_of_total AS old_pct,
+ l.pct_of_total AS new_pct
+ FROM latest_ver l
+ FULL JOIN older_ver o
+ ON (l.category = o.category)
+ ORDER BY COALESCE(l.category, o.category)
+ FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8503'
+ test_id: '1503'
+ test_type: Distribution_Shift
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_redactable_columns: category
+ lookup_query: |-
+ WITH latest_ver
+ AS ( SELECT {CONCAT_COLUMNS} as category,
+ CAST(COUNT(*) AS DECIMAL) / CAST(SUM(COUNT(*)) OVER () AS DECIMAL) AS pct_of_total
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" v1
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {COLUMN_NAME_NO_QUOTES} ),
+ older_ver
+ AS ( SELECT {CONCAT_MATCH_GROUPBY} as category,
+ CAST(COUNT(*) AS DECIMAL) / CAST(SUM(COUNT(*)) OVER () AS DECIMAL) AS pct_of_total
+ FROM {MATCH_SCHEMA_NAME}.{TABLE_NAME} v2
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES} )
+ SELECT COALESCE(l.category, o.category) AS category,
+ o.pct_of_total AS old_pct,
+ l.pct_of_total AS new_pct
+ FROM latest_ver l
+ FULL JOIN older_ver o
+ ON (l.category = o.category)
+ ORDER BY COALESCE(l.category, o.category)
+ LIMIT {LIMIT}
+ error_type: Test Results
test_templates:
- id: '2503'
test_type: Distribution_Shift
@@ -590,3 +653,105 @@ test_types:
SELECT 0.5 * ABS(SUM(new_pct * LN(new_pct/avg_pct)/LN(2)))
+ 0.5 * ABS(SUM(old_pct * LN(old_pct/avg_pct)/LN(2))) as js_divergence
FROM dataset ) rslt;
+ - id: '8003'
+ test_type: Distribution_Shift
+ sql_flavor: oracle
+ template: |-
+ -- Relative Entropy: measured by Jensen-Shannon Divergence
+ -- Smoothed and normalized version of KL divergence,
+ -- with scores between 0 (identical) and 1 (maximally different),
+ -- when using the base-2 logarithm. Formula is:
+ -- 0.5 * kl_divergence(p, m) + 0.5 * kl_divergence(q, m)
+ -- Log base 2 of x = LN(x)/LN(2)
+ WITH latest_ver
+ AS ( SELECT {CONCAT_COLUMNS} as category,
+ CAST(COUNT(*) AS NUMBER) / CAST(SUM(COUNT(*)) OVER () AS NUMBER) AS pct_of_total
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE} v1
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {COLUMN_NAME_NO_QUOTES} ),
+ older_ver
+ AS ( SELECT {CONCAT_MATCH_GROUPBY} as category,
+ CAST(COUNT(*) AS NUMBER) / CAST(SUM(COUNT(*)) OVER () AS NUMBER) AS pct_of_total
+ FROM {QUOTE}{MATCH_SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE} v2
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES} ),
+ dataset
+ AS ( SELECT COALESCE(l.category, o.category) AS category,
+ COALESCE(o.pct_of_total, 0.0000001) AS old_pct,
+ COALESCE(l.pct_of_total, 0.0000001) AS new_pct,
+ (COALESCE(o.pct_of_total, 0.0000001)
+ + COALESCE(l.pct_of_total, 0.0000001))/2.0 AS avg_pct
+ FROM latest_ver l
+ FULL JOIN older_ver o
+ ON (l.category = o.category) )
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ -- '{GROUPBY_NAMES}' as column_names,
+ '{THRESHOLD_VALUE}' as threshold_value,
+ NULL as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN js_divergence > {THRESHOLD_VALUE} THEN 0 ELSE 1 END as result_code,
+ 'Divergence Level: ' || CAST(js_divergence AS {VARCHAR_TYPE}) || ', Threshold: {THRESHOLD_VALUE}.' as result_message,
+ js_divergence as result_measure
+ FROM (
+ SELECT 0.5 * ABS(SUM(new_pct * LN(new_pct/avg_pct)/LN(2)))
+ + 0.5 * ABS(SUM(old_pct * LN(old_pct/avg_pct)/LN(2))) as js_divergence
+ FROM dataset ) rslt
+ - id: '8003'
+ test_type: Distribution_Shift
+ sql_flavor: sap_hana
+ template: |-
+ -- Relative Entropy: measured by Jensen-Shannon Divergence
+ -- Smoothed and normalized version of KL divergence,
+ -- with scores between 0 (identical) and 1 (maximally different),
+ -- when using the base-2 logarithm. Formula is:
+ -- 0.5 * kl_divergence(p, m) + 0.5 * kl_divergence(q, m)
+ -- Log base 2 of x = LN(x)/LN(2)
+ WITH latest_ver
+ AS ( SELECT {CONCAT_COLUMNS} as category,
+ CAST(COUNT(*) AS DECIMAL) / CAST(SUM(COUNT(*)) OVER () AS DECIMAL) AS pct_of_total
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE} v1
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {COLUMN_NAME_NO_QUOTES} ),
+ older_ver
+ AS ( SELECT {CONCAT_MATCH_GROUPBY} as category,
+ CAST(COUNT(*) AS DECIMAL) / CAST(SUM(COUNT(*)) OVER () AS DECIMAL) AS pct_of_total
+ FROM {QUOTE}{MATCH_SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE} v2
+ WHERE {MATCH_SUBSET_CONDITION}
+ GROUP BY {MATCH_GROUPBY_NAMES} ),
+ dataset
+ AS ( SELECT COALESCE(l.category, o.category) AS category,
+ COALESCE(o.pct_of_total, 0.0000001) AS old_pct,
+ COALESCE(l.pct_of_total, 0.0000001) AS new_pct,
+ (COALESCE(o.pct_of_total, 0.0000001)
+ + COALESCE(l.pct_of_total, 0.0000001))/2.0 AS avg_pct
+ FROM latest_ver l
+ FULL JOIN older_ver o
+ ON (l.category = o.category) )
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ -- '{GROUPBY_NAMES}' as column_names,
+ '{THRESHOLD_VALUE}' as threshold_value,
+ NULL as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN js_divergence > {THRESHOLD_VALUE} THEN 0 ELSE 1 END as result_code,
+ 'Divergence Level: ' || CAST(js_divergence AS {VARCHAR_TYPE}) || ', Threshold: {THRESHOLD_VALUE}.' as result_message,
+ js_divergence as result_measure
+ FROM (
+ SELECT 0.5 * ABS(SUM(new_pct * LN(new_pct/avg_pct)/LN(2)))
+ + 0.5 * ABS(SUM(old_pct * LN(old_pct/avg_pct)/LN(2))) as js_divergence
+ FROM dataset ) rslt
diff --git a/testgen/template/dbsetup_test_types/test_types_Dupe_Rows.yaml b/testgen/template/dbsetup_test_types/test_types_Dupe_Rows.yaml
index 480988a5..138abb10 100644
--- a/testgen/template/dbsetup_test_types/test_types_Dupe_Rows.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Dupe_Rows.yaml
@@ -76,7 +76,7 @@ test_types:
SELECT TOP {LIMIT} {GROUPBY_NAMES}, COUNT(*) as record_ct
FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE {SUBSET_CONDITION}
- GROUP BY {GROUPBY_NAMES}
+ GROUP BY {GROUPBY_NAMES}, BINARY_CHECKSUM({GROUPBY_NAMES})
HAVING COUNT(*) > 1
ORDER BY {GROUPBY_NAMES}
error_type: Test Results
@@ -136,6 +136,34 @@ test_types:
ORDER BY {GROUPBY_NAMES}
LIMIT {LIMIT};
error_type: Test Results
+ - id: '8510'
+ test_id: '1510'
+ test_type: Dupe_Rows
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT {GROUPBY_NAMES}, COUNT(*) as record_ct
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ HAVING COUNT(*) > 1
+ ORDER BY {GROUPBY_NAMES}
+ FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8510'
+ test_id: '1510'
+ test_type: Dupe_Rows
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT {GROUPBY_NAMES}, COUNT(*) as record_ct
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ HAVING COUNT(*) > 1
+ ORDER BY {GROUPBY_NAMES}
+ LIMIT {LIMIT}
+ error_type: Test Results
test_templates:
- id: '2511'
test_type: Dupe_Rows
@@ -248,7 +276,8 @@ test_types:
FROM ( SELECT {GROUPBY_NAMES}, COUNT(*) as record_ct
FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
WHERE {SUBSET_CONDITION}
- GROUP BY {GROUPBY_NAMES}
+ -- Checksum is added because string column grouping is case insensitive
+ GROUP BY {GROUPBY_NAMES}, BINARY_CHECKSUM({GROUPBY_NAMES})
HAVING COUNT(*) > 1
) test;
- id: '2311'
@@ -403,3 +432,69 @@ test_types:
GROUP BY {GROUPBY_NAMES}
HAVING COUNT(*) > 1
) test;
+ - id: '8011'
+ test_type: Dupe_Rows
+ sql_flavor: oracle
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' duplicate row(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COALESCE(SUM(record_ct), 0) as result_measure
+ FROM ( SELECT {GROUPBY_NAMES}, COUNT(*) as record_ct
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ HAVING COUNT(*) > 1
+ ) test
+ - id: '8011'
+ test_type: Dupe_Rows
+ sql_flavor: sap_hana
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS {VARCHAR_TYPE}) || ' duplicate row(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COALESCE(SUM(record_ct), 0) as result_measure
+ FROM ( SELECT {GROUPBY_NAMES}, COUNT(*) as record_ct
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ GROUP BY {GROUPBY_NAMES}
+ HAVING COUNT(*) > 1
+ ) test
diff --git a/testgen/template/dbsetup_test_types/test_types_Email_Format.yaml b/testgen/template/dbsetup_test_types/test_types_Email_Format.yaml
index 1ec48c42..1d49d881 100644
--- a/testgen/template/dbsetup_test_types/test_types_Email_Format.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Email_Format.yaml
@@ -100,6 +100,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8009'
+ test_type: Email_Format
+ sql_flavor: oracle
+ measure: |-
+ SUM(CASE WHEN NOT REGEXP_LIKE({COLUMN_NAME}, '^[A-Za-z0-9._''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$') THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8009'
+ test_type: Email_Format
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(CASE WHEN NOT {COLUMN_NAME} LIKE_REGEXPR '^[A-Za-z0-9._''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$' THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1372'
test_id: '1014'
@@ -109,7 +125,7 @@ test_types:
lookup_query: |-
SELECT `{COLUMN_NAME}`, COUNT(*) AS count
FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`
- WHERE NOT REGEXP_CONTAINS(CAST(`{COLUMN_NAME}` AS STRING), r'^[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}$')
+ WHERE NOT REGEXP_CONTAINS(CAST(`{COLUMN_NAME}` AS STRING), r'^[A-Za-z0-9._%+-]+@(?:[A-Za-z0-9-]+[.])+[A-Za-z]{2,}$')
GROUP BY `{COLUMN_NAME}`
LIMIT {LIMIT};
error_type: Test Results
@@ -119,7 +135,7 @@ test_types:
sql_flavor: databricks
lookup_type: null
lookup_query: |-
- SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` WHERE REGEXP_LIKE(`{COLUMN_NAME}`::STRING, '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$') != 1 GROUP BY `{COLUMN_NAME}` LIMIT {LIMIT};
+ SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` WHERE NOT REGEXP_LIKE(`{COLUMN_NAME}`::STRING, '^[A-Za-z0-9._''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$') GROUP BY `{COLUMN_NAME}` LIMIT {LIMIT};
error_type: Test Results
- id: '1148'
test_id: '1014'
@@ -135,7 +151,7 @@ test_types:
sql_flavor: postgresql
lookup_type: null
lookup_query: |-
- SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" !~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$' GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT};
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" !~ '^[A-Za-z0-9._''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$' GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Test Results
- id: '1009'
test_id: '1014'
@@ -159,6 +175,22 @@ test_types:
sql_flavor: snowflake
lookup_type: null
lookup_query: |-
- SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE REGEXP_LIKE("{COLUMN_NAME}"::VARCHAR, '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$') != 1 GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT};
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE NOT REGEXP_LIKE("{COLUMN_NAME}"::VARCHAR, '^[A-Za-z0-9._''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$') GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT};
+ error_type: Test Results
+ - id: '8009'
+ test_id: '1014'
+ test_type: Email_Format
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE NOT REGEXP_LIKE("{COLUMN_NAME}", '^[A-Za-z0-9._''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$') GROUP BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8009'
+ test_id: '1014'
+ test_type: Email_Format
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE NOT "{COLUMN_NAME}" LIKE_REGEXPR '^[A-Za-z0-9._''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$' GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT}
error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Freshness_Trend.yaml b/testgen/template/dbsetup_test_types/test_types_Freshness_Trend.yaml
index 0cfeecf7..e151fa6c 100644
--- a/testgen/template/dbsetup_test_types/test_types_Freshness_Trend.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Freshness_Trend.yaml
@@ -24,6 +24,7 @@ test_types:
Record Subset Condition,Lower Bound,Upper Bound,History Lookback
default_parm_help: |-
Condition defining a subset of records in main table
+ default_parm_required: N,N,N,N
default_severity: Fail
run_type: QUERY
test_scope: table
@@ -45,7 +46,7 @@ test_types:
template: |-
WITH test_data AS (
SELECT
- {CUSTOM_QUERY} AS fingerprint,
+ TO_HEX(MD5({CUSTOM_QUERY})) AS fingerprint,
DATETIME_DIFF(DATETIME('{RUN_DATE}'), DATETIME(NULLIF('{BASELINE_SUM}', '')), MINUTE) AS interval_minutes
FROM `{SCHEMA_NAME}.{TABLE_NAME}`
WHERE {SUBSET_CONDITION}
@@ -95,7 +96,7 @@ test_types:
template: |-
WITH test_data AS (
SELECT
- {CUSTOM_QUERY} AS fingerprint,
+ MD5({CUSTOM_QUERY}) AS fingerprint,
DATEDIFF(MINUTE, TO_TIMESTAMP(NULLIF('{BASELINE_SUM}', '')), TIMESTAMP '{RUN_DATE}') AS interval_minutes
FROM `{SCHEMA_NAME}`.`{TABLE_NAME}`
WHERE {SUBSET_CONDITION}
@@ -145,7 +146,7 @@ test_types:
template: |-
WITH test_data AS (
SELECT
- {CUSTOM_QUERY} AS fingerprint,
+ LOWER(CONVERT(VARCHAR(40), HASHBYTES('MD5', CAST({CUSTOM_QUERY} AS VARCHAR(MAX))), 2)) AS fingerprint,
DATEDIFF(MINUTE, CAST(NULLIF('{BASELINE_SUM}', '') AS DATETIME2), CAST('{RUN_DATE}' AS DATETIME2)) AS interval_minutes
FROM "{SCHEMA_NAME}"."{TABLE_NAME}" WITH (NOLOCK)
WHERE {SUBSET_CONDITION}
@@ -195,7 +196,7 @@ test_types:
template: |-
WITH test_data AS (
SELECT
- {CUSTOM_QUERY} AS fingerprint,
+ MD5({CUSTOM_QUERY}) AS fingerprint,
(EXTRACT(EPOCH FROM ('{RUN_DATE}'::TIMESTAMP - NULLIF('{BASELINE_SUM}', '')::TIMESTAMP)) / 60)::INTEGER AS interval_minutes
FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
WHERE {SUBSET_CONDITION}
@@ -245,7 +246,7 @@ test_types:
template: |-
WITH test_data AS (
SELECT
- {CUSTOM_QUERY} AS fingerprint,
+ MD5({CUSTOM_QUERY}) AS fingerprint,
DATEDIFF(MINUTE, NULLIF('{BASELINE_SUM}', '')::TIMESTAMP, '{RUN_DATE}'::TIMESTAMP) AS interval_minutes
FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
WHERE {SUBSET_CONDITION}
@@ -295,7 +296,7 @@ test_types:
template: |-
WITH test_data AS (
SELECT
- {CUSTOM_QUERY} AS fingerprint,
+ MD5({CUSTOM_QUERY}) AS fingerprint,
DATEDIFF(MINUTE, NULLIF('{BASELINE_SUM}', '')::TIMESTAMP, '{RUN_DATE}'::TIMESTAMP) AS interval_minutes
FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
WHERE {SUBSET_CONDITION}
@@ -345,7 +346,7 @@ test_types:
template: |-
WITH test_data AS (
SELECT
- {CUSTOM_QUERY} AS fingerprint,
+ MD5({CUSTOM_QUERY}) AS fingerprint,
DATEDIFF(MINUTE, NULLIF('{BASELINE_SUM}', '')::TIMESTAMP, '{RUN_DATE}'::TIMESTAMP) AS interval_minutes
FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
WHERE {SUBSET_CONDITION}
@@ -389,3 +390,104 @@ test_types:
ELSE COALESCE(interval_minutes::VARCHAR, 'Unknown')
END AS result_signal
FROM test_data;
+ - id: '2817'
+ test_type: Freshness_Trend
+ sql_flavor: oracle
+ template: |-
+ WITH test_data AS (
+ SELECT
+ LOWER(RAWTOHEX(STANDARD_HASH({CUSTOM_QUERY}, 'MD5'))) AS fingerprint,
+ ROUND((CAST(TO_TIMESTAMP('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS') AS DATE) -
+ CAST(TO_TIMESTAMP('{BASELINE_SUM}', 'YYYY-MM-DD HH24:MI:SS') AS DATE)) * 24 * 60) AS interval_minutes
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ )
+ SELECT '{TEST_TYPE}' AS test_type,
+ '{TEST_DEFINITION_ID}' AS test_definition_id,
+ '{TEST_SUITE_ID}' AS test_suite_id,
+ '{TEST_RUN_ID}' AS test_run_id,
+ '{RUN_DATE}' AS test_time,
+ '{SCHEMA_NAME}' AS schema_name,
+ '{TABLE_NAME}' AS table_name,
+ '{COLUMN_NAME_NO_QUOTES}' AS column_names,
+ '{SKIP_ERRORS}' AS threshold_value,
+ {SKIP_ERRORS} AS skip_errors,
+ '{INPUT_PARAMETERS}' AS input_parameters,
+ fingerprint AS result_measure,
+ CASE
+ -- Training mode: tolerances not yet calculated
+ WHEN {LOWER_TOLERANCE} IS NULL AND {UPPER_TOLERANCE} IS NULL THEN -1
+ -- No change and excluded day: suppress
+ WHEN fingerprint = '{BASELINE_VALUE}' AND {IS_EXCLUDED_DAY} = 1 THEN 1
+ -- No change, beyond time range (business time): LATE
+ WHEN fingerprint = '{BASELINE_VALUE}'
+ AND (interval_minutes - {EXCLUDED_MINUTES}) > {THRESHOLD_VALUE} THEN 0
+ -- Table changed outside time range (business time): UNEXPECTED
+ WHEN fingerprint <> '{BASELINE_VALUE}'
+ AND NOT (interval_minutes - {EXCLUDED_MINUTES})
+ BETWEEN {LOWER_TOLERANCE} AND {UPPER_TOLERANCE} THEN 0
+ ELSE 1
+ END AS result_code,
+ 'Table update detected: ' || CASE WHEN fingerprint <> '{BASELINE_VALUE}' THEN 'Yes' ELSE 'No' END
+ || CASE
+ WHEN fingerprint <> '{BASELINE_VALUE}' AND (interval_minutes - {EXCLUDED_MINUTES}) BETWEEN {LOWER_TOLERANCE} AND {UPPER_TOLERANCE} THEN '. On time.'
+ WHEN fingerprint <> '{BASELINE_VALUE}' AND (interval_minutes - {EXCLUDED_MINUTES}) < {LOWER_TOLERANCE} THEN '. Earlier than expected.'
+ WHEN fingerprint <> '{BASELINE_VALUE}' AND (interval_minutes - {EXCLUDED_MINUTES}) > {UPPER_TOLERANCE} THEN '. Later than expected.'
+ WHEN fingerprint = '{BASELINE_VALUE}' AND {IS_EXCLUDED_DAY} = 0 AND (interval_minutes - {EXCLUDED_MINUTES}) > {THRESHOLD_VALUE} THEN '. Late.'
+ ELSE ''
+ END AS result_message,
+ CASE
+ WHEN fingerprint <> '{BASELINE_VALUE}' THEN '0'
+ ELSE COALESCE(TO_CHAR(interval_minutes), 'Unknown')
+ END AS result_signal
+ FROM test_data;
+ - id: '2817'
+ test_type: Freshness_Trend
+ sql_flavor: sap_hana
+ template: |-
+ WITH test_data AS (
+ SELECT
+ LOWER(BINTOHEX(HASH_MD5(TO_BINARY({CUSTOM_QUERY})))) AS fingerprint,
+ ROUND(SECONDS_BETWEEN(TO_TIMESTAMP('{BASELINE_SUM}', 'YYYY-MM-DD HH24:MI:SS'), TO_TIMESTAMP('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')) / 60.0) AS interval_minutes
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ )
+ SELECT '{TEST_TYPE}' AS test_type,
+ '{TEST_DEFINITION_ID}' AS test_definition_id,
+ '{TEST_SUITE_ID}' AS test_suite_id,
+ '{TEST_RUN_ID}' AS test_run_id,
+ '{RUN_DATE}' AS test_time,
+ '{SCHEMA_NAME}' AS schema_name,
+ '{TABLE_NAME}' AS table_name,
+ '{COLUMN_NAME_NO_QUOTES}' AS column_names,
+ '{SKIP_ERRORS}' AS threshold_value,
+ {SKIP_ERRORS} AS skip_errors,
+ '{INPUT_PARAMETERS}' AS input_parameters,
+ fingerprint AS result_measure,
+ CASE
+ -- Training mode: tolerances not yet calculated
+ WHEN {LOWER_TOLERANCE} IS NULL AND {UPPER_TOLERANCE} IS NULL THEN -1
+ -- No change and excluded day: suppress
+ WHEN fingerprint = '{BASELINE_VALUE}' AND {IS_EXCLUDED_DAY} = 1 THEN 1
+ -- No change, beyond time range (business time): LATE
+ WHEN fingerprint = '{BASELINE_VALUE}'
+ AND (interval_minutes - {EXCLUDED_MINUTES}) > {THRESHOLD_VALUE} THEN 0
+ -- Table changed outside time range (business time): UNEXPECTED
+ WHEN fingerprint <> '{BASELINE_VALUE}'
+ AND NOT (interval_minutes - {EXCLUDED_MINUTES})
+ BETWEEN {LOWER_TOLERANCE} AND {UPPER_TOLERANCE} THEN 0
+ ELSE 1
+ END AS result_code,
+ 'Table update detected: ' || CASE WHEN fingerprint <> '{BASELINE_VALUE}' THEN 'Yes' ELSE 'No' END
+ || CASE
+ WHEN fingerprint <> '{BASELINE_VALUE}' AND (interval_minutes - {EXCLUDED_MINUTES}) BETWEEN {LOWER_TOLERANCE} AND {UPPER_TOLERANCE} THEN '. On time.'
+ WHEN fingerprint <> '{BASELINE_VALUE}' AND (interval_minutes - {EXCLUDED_MINUTES}) < {LOWER_TOLERANCE} THEN '. Earlier than expected.'
+ WHEN fingerprint <> '{BASELINE_VALUE}' AND (interval_minutes - {EXCLUDED_MINUTES}) > {UPPER_TOLERANCE} THEN '. Later than expected.'
+ WHEN fingerprint = '{BASELINE_VALUE}' AND {IS_EXCLUDED_DAY} = 0 AND (interval_minutes - {EXCLUDED_MINUTES}) > {THRESHOLD_VALUE} THEN '. Late.'
+ ELSE ''
+ END AS result_message,
+ CASE
+ WHEN fingerprint <> '{BASELINE_VALUE}' THEN '0'
+ ELSE COALESCE(TO_VARCHAR(interval_minutes), 'Unknown')
+ END AS result_signal
+ FROM test_data;
diff --git a/testgen/template/dbsetup_test_types/test_types_Future_Date.yaml b/testgen/template/dbsetup_test_types/test_types_Future_Date.yaml
index 646cc9c0..af804c97 100644
--- a/testgen/template/dbsetup_test_types/test_types_Future_Date.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Future_Date.yaml
@@ -55,7 +55,7 @@ test_types:
test_type: Future_Date
sql_flavor: mssql
measure: |-
- SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) >= CONVERT(DATE, '{RUN_DATE}') THEN 1 ELSE 0 END)
+ SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) > CONVERT(DATE, '{RUN_DATE}') THEN 1 ELSE 0 END)
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -95,7 +95,23 @@ test_types:
test_type: Future_Date
sql_flavor: trino
measure: |-
- SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) >= CAST('{RUN_DATE}' AS DATE) THEN 1 ELSE 0 END)
+ SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) > CAST('{RUN_DATE}' AS DATE) THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8010'
+ test_type: Future_Date
+ sql_flavor: oracle
+ measure: |-
+ SUM(CASE WHEN TRUNC({COLUMN_NAME}) > TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS') THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8010'
+ test_type: Future_Date
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) > TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS') THEN 1 ELSE 0 END)
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -126,7 +142,7 @@ test_types:
sql_flavor: mssql
lookup_type: null
lookup_query: |-
- SELECT DISTINCT TOP {LIMIT} "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CAST("{COLUMN_NAME}" AS DATE) >= CONVERT(DATE, '{TEST_DATE}') GROUP BY "{COLUMN_NAME}";
+ SELECT DISTINCT TOP {LIMIT} "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CAST("{COLUMN_NAME}" AS DATE) > CONVERT(DATE, '{TEST_DATE}') GROUP BY "{COLUMN_NAME}";
error_type: Test Results
- id: '1092'
test_id: '1015'
@@ -160,4 +176,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE GREATEST(0, SIGN("{COLUMN_NAME}"::DATE - '{TEST_DATE}'::DATE)) > {THRESHOLD_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Test Results
+ - id: '8010'
+ test_id: '1015'
+ test_type: Future_Date
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE TRUNC("{COLUMN_NAME}") > TO_DATE('{TEST_DATE}', 'YYYY-MM-DD HH24:MI:SS') GROUP BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8010'
+ test_id: '1015'
+ test_type: Future_Date
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CAST("{COLUMN_NAME}" AS DATE) > TO_DATE('{TEST_DATE}', 'YYYY-MM-DD HH24:MI:SS') GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Future_Date_1Y.yaml b/testgen/template/dbsetup_test_types/test_types_Future_Date_1Y.yaml
index 7f55192c..ae400acb 100644
--- a/testgen/template/dbsetup_test_types/test_types_Future_Date_1Y.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Future_Date_1Y.yaml
@@ -56,7 +56,7 @@ test_types:
test_type: Future_Date_1Y
sql_flavor: mssql
measure: |-
- SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) >= DATEADD(DAY, 365, CONVERT(DATE, '{RUN_DATE}')) THEN 1 ELSE 0 END)
+ SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) > DATEADD(DAY, 365, CONVERT(DATE, '{RUN_DATE}')) THEN 1 ELSE 0 END)
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -96,7 +96,23 @@ test_types:
test_type: Future_Date_1Y
sql_flavor: trino
measure: |-
- SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) >= (FROM_ISO8601_DATE('{RUN_DATE}') + interval '365' day ) THEN 1 ELSE 0 END)
+ SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) > (FROM_ISO8601_DATE('{RUN_DATE}') + interval '365' day ) THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8011'
+ test_type: Future_Date_1Y
+ sql_flavor: oracle
+ measure: |-
+ SUM(CASE WHEN TRUNC({COLUMN_NAME}) > TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS') + 365 THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8011'
+ test_type: Future_Date_1Y
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(CASE WHEN CAST({COLUMN_NAME} AS DATE) > ADD_DAYS(TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS'), 365) THEN 1 ELSE 0 END)
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -127,7 +143,7 @@ test_types:
sql_flavor: mssql
lookup_type: null
lookup_query: |-
- SELECT DISTINCT TOP {LIMIT} "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CAST("{COLUMN_NAME}" AS DATE) >= DATEADD(DAY, 365, CONVERT(DATE, '{TEST_DATE}')) GROUP BY "{COLUMN_NAME}";
+ SELECT DISTINCT TOP {LIMIT} "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CAST("{COLUMN_NAME}" AS DATE) > DATEADD(DAY, 365, CONVERT(DATE, '{TEST_DATE}')) GROUP BY "{COLUMN_NAME}";
error_type: Test Results
- id: '1093'
test_id: '1016'
@@ -161,4 +177,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE GREATEST(0, SIGN("{COLUMN_NAME}"::DATE - ('{TEST_DATE}'::DATE + 365))) > {THRESHOLD_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Test Results
+ - id: '8016'
+ test_id: '1016'
+ test_type: Future_Date_1Y
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE TRUNC("{COLUMN_NAME}") > TO_DATE('{TEST_DATE}', 'YYYY-MM-DD HH24:MI:SS') + 365 GROUP BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8016'
+ test_id: '1016'
+ test_type: Future_Date_1Y
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CAST("{COLUMN_NAME}" AS DATE) > ADD_DAYS(TO_DATE('{TEST_DATE}', 'YYYY-MM-DD HH24:MI:SS'), 365) GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Incr_Avg_Shift.yaml b/testgen/template/dbsetup_test_types/test_types_Incr_Avg_Shift.yaml
index 94655ff8..707d20a6 100644
--- a/testgen/template/dbsetup_test_types/test_types_Incr_Avg_Shift.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Incr_Avg_Shift.yaml
@@ -101,6 +101,22 @@ test_types:
test_operator: '>='
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8012'
+ test_type: Incr_Avg_Shift
+ sql_flavor: oracle
+ measure: |-
+ NVL(ABS(({BASELINE_AVG} - (SUM({COLUMN_NAME}) - {BASELINE_SUM}) / NULLIF(COUNT({COLUMN_NAME}) - {BASELINE_VALUE_CT}, 0)) / {BASELINE_SD}), 0)
+ test_operator: '>='
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8012'
+ test_type: Incr_Avg_Shift
+ sql_flavor: sap_hana
+ measure: |-
+ COALESCE(ABS(({BASELINE_AVG} - (SUM({COLUMN_NAME}) - {BASELINE_SUM}) / NULLIF(COUNT({COLUMN_NAME}) - {BASELINE_VALUE_CT}, 0)) / {BASELINE_SD}), 0)
+ test_operator: '>='
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1375'
test_id: '1017'
@@ -161,4 +177,20 @@ test_types:
lookup_query: |-
SELECT AVG("{COLUMN_NAME}" :: FLOAT) AS current_average, SUM("{COLUMN_NAME}" ::FLOAT) AS current_sum, NULLIF(COUNT("{COLUMN_NAME}" )::FLOAT, 0) as current_value_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}";
error_type: Test Results
+ - id: '8012'
+ test_id: '1017'
+ test_type: Incr_Avg_Shift
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT AVG(CAST("{COLUMN_NAME}" AS NUMBER)) AS current_average, SUM(CAST("{COLUMN_NAME}" AS NUMBER)) AS current_sum, NULLIF(COUNT("{COLUMN_NAME}"), 0) as current_value_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ error_type: Test Results
+ - id: '8012'
+ test_id: '1017'
+ test_type: Incr_Avg_Shift
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT AVG(CAST("{COLUMN_NAME}" AS DECIMAL)) AS current_average, SUM(CAST("{COLUMN_NAME}" AS DECIMAL)) AS current_sum, NULLIF(COUNT("{COLUMN_NAME}"), 0) as current_value_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_LOV_All.yaml b/testgen/template/dbsetup_test_types/test_types_LOV_All.yaml
index 85665563..6c69fa22 100644
--- a/testgen/template/dbsetup_test_types/test_types_LOV_All.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_LOV_All.yaml
@@ -98,12 +98,29 @@ test_types:
test_operator: <>
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8013'
+ test_type: LOV_All
+ sql_flavor: oracle
+ measure: |-
+ LISTAGG(DISTINCT {COLUMN_NAME}, '|') WITHIN GROUP (ORDER BY {COLUMN_NAME})
+ test_operator: <>
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8013'
+ test_type: LOV_All
+ sql_flavor: sap_hana
+ measure: |-
+ LISTAGG(DISTINCT {COLUMN_NAME}, '|') WITHIN GROUP (ORDER BY {COLUMN_NAME})
+ test_operator: <>
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1376'
test_id: '1018'
test_type: LOV_All
sql_flavor: bigquery
lookup_type: null
+ lookup_redactable_columns: lov
lookup_query: |-
SELECT lov
FROM (
@@ -118,47 +135,71 @@ test_types:
test_type: LOV_All
sql_flavor: databricks
lookup_type: null
+ lookup_redactable_columns: lov
lookup_query: |-
- SELECT ARRAY_JOIN(ARRAY_SORT(COLLECT_SET(`{COLUMN_NAME}`)), '|') AS aggregated_values FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` HAVING ARRAY_JOIN(ARRAY_SORT(COLLECT_SET(`{COLUMN_NAME}`)), '|') <> '{THRESHOLD_VALUE}' LIMIT {LIMIT};
+ SELECT ARRAY_JOIN(ARRAY_SORT(COLLECT_SET(`{COLUMN_NAME}`)), '|') AS lov FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` HAVING ARRAY_JOIN(ARRAY_SORT(COLLECT_SET(`{COLUMN_NAME}`)), '|') <> '{THRESHOLD_VALUE}' LIMIT {LIMIT};
error_type: Test Results
- id: '1152'
test_id: '1018'
test_type: LOV_All
sql_flavor: mssql
lookup_type: null
+ lookup_redactable_columns: lov
lookup_query: |-
- WITH CTE AS (SELECT DISTINCT "{COLUMN_NAME}" FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") SELECT TOP {LIMIT} STRING_AGG( "{COLUMN_NAME}", '|' ) WITHIN GROUP (ORDER BY "{COLUMN_NAME}" ASC) FROM CTE HAVING STRING_AGG("{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}" ASC) <> '{THRESHOLD_VALUE}';
+ WITH CTE AS (SELECT DISTINCT "{COLUMN_NAME}" FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") SELECT TOP {LIMIT} STRING_AGG( "{COLUMN_NAME}", '|' ) WITHIN GROUP (ORDER BY "{COLUMN_NAME}" ASC) AS lov FROM CTE HAVING STRING_AGG("{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}" ASC) <> '{THRESHOLD_VALUE}';
error_type: Test Results
- id: '1095'
test_id: '1018'
test_type: LOV_All
sql_flavor: postgresql
lookup_type: null
+ lookup_redactable_columns: lov
lookup_query: |-
- SELECT STRING_AGG(DISTINCT "{COLUMN_NAME}", '|' ORDER BY "{COLUMN_NAME}" ASC) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" HAVING STRING_AGG(DISTINCT "{COLUMN_NAME}", '|' ORDER BY "{COLUMN_NAME}" ASC) <> '{THRESHOLD_VALUE}' LIMIT {LIMIT};
+ SELECT STRING_AGG(DISTINCT "{COLUMN_NAME}", '|' ORDER BY "{COLUMN_NAME}" ASC) AS lov FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" HAVING STRING_AGG(DISTINCT "{COLUMN_NAME}", '|' ORDER BY "{COLUMN_NAME}" ASC) <> '{THRESHOLD_VALUE}' LIMIT {LIMIT};
error_type: Test Results
- id: '1013'
test_id: '1018'
test_type: LOV_All
sql_flavor: redshift
lookup_type: null
+ lookup_redactable_columns: lov
lookup_query: |-
- SELECT LISTAGG(DISTINCT "{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" HAVING LISTAGG(DISTINCT "{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") <> '{THRESHOLD_VALUE}' LIMIT {LIMIT};
+ SELECT LISTAGG(DISTINCT "{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") AS lov FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" HAVING LISTAGG(DISTINCT "{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") <> '{THRESHOLD_VALUE}' LIMIT {LIMIT};
error_type: Test Results
- id: '1413'
test_id: '1018'
test_type: LOV_All
sql_flavor: redshift_spectrum
lookup_type: null
+ lookup_redactable_columns: lov
lookup_query: |-
- SELECT LISTAGG(DISTINCT "{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" HAVING LISTAGG(DISTINCT "{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") <> '{THRESHOLD_VALUE}' LIMIT {LIMIT};
+ SELECT LISTAGG(DISTINCT "{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") AS lov FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" HAVING LISTAGG(DISTINCT "{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") <> '{THRESHOLD_VALUE}' LIMIT {LIMIT};
error_type: Test Results
- id: '1209'
test_id: '1018'
test_type: LOV_All
sql_flavor: snowflake
lookup_type: null
+ lookup_redactable_columns: lov
+ lookup_query: |-
+ SELECT LISTAGG(DISTINCT "{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") AS lov FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" HAVING LISTAGG(DISTINCT "{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") <> '{THRESHOLD_VALUE}' LIMIT {LIMIT};
+ error_type: Test Results
+ - id: '8013'
+ test_id: '1018'
+ test_type: LOV_All
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_redactable_columns: lov
+ lookup_query: |-
+ SELECT LISTAGG("{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") AS lov FROM (SELECT DISTINCT "{COLUMN_NAME}" FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") HAVING LISTAGG("{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") <> '{THRESHOLD_VALUE}' FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8013'
+ test_id: '1018'
+ test_type: LOV_All
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_redactable_columns: lov
lookup_query: |-
- SELECT LISTAGG(DISTINCT "{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" HAVING LISTAGG(DISTINCT "{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") <> '{THRESHOLD_VALUE}' LIMIT {LIMIT};
+ SELECT LISTAGG("{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") AS lov FROM (SELECT DISTINCT "{COLUMN_NAME}" FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") HAVING LISTAGG("{COLUMN_NAME}", '|') WITHIN GROUP (ORDER BY "{COLUMN_NAME}") <> '{THRESHOLD_VALUE}' LIMIT {LIMIT}
error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_LOV_Match.yaml b/testgen/template/dbsetup_test_types/test_types_LOV_Match.yaml
index fed0b3ec..ef37b028 100644
--- a/testgen/template/dbsetup_test_types/test_types_LOV_Match.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_LOV_Match.yaml
@@ -204,6 +204,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8014'
+ test_type: LOV_Match
+ sql_flavor: oracle
+ measure: |-
+ SUM(CASE WHEN {COLUMN_NAME} IS NOT NULL AND {COLUMN_NAME} NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8014'
+ test_type: LOV_Match
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(CASE WHEN {COLUMN_NAME} IS NOT NULL AND {COLUMN_NAME} NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1377'
test_id: '1019'
@@ -265,4 +281,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT NULLIF("{COLUMN_NAME}", '') AS "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE NULLIF("{COLUMN_NAME}", '') NOT IN {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Test Results
+ - id: '8014'
+ test_id: '1019'
+ test_type: LOV_Match
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NOT NULL AND "{COLUMN_NAME}" NOT IN {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8014'
+ test_id: '1019'
+ test_type: LOV_Match
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NOT NULL AND "{COLUMN_NAME}" NOT IN {BASELINE_VALUE} GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Metric_Trend.yaml b/testgen/template/dbsetup_test_types/test_types_Metric_Trend.yaml
index 545d25f6..524d5135 100644
--- a/testgen/template/dbsetup_test_types/test_types_Metric_Trend.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Metric_Trend.yaml
@@ -19,6 +19,7 @@ test_types:
default_parm_values: null
default_parm_prompts: Metric Name,Metric Expression,Lower Bound,Upper Bound,History Lookback
default_parm_help: null
+ default_parm_required: Y,Y,N,N,N
default_severity: Fail
run_type: CAT
test_scope: table
@@ -88,6 +89,22 @@ test_types:
test_operator: NOT BETWEEN
test_condition: |-
{LOWER_TOLERANCE} AND {UPPER_TOLERANCE}
+ - id: '8016'
+ test_type: Metric_Trend
+ sql_flavor: oracle
+ measure: |-
+ {CUSTOM_QUERY}
+ test_operator: NOT BETWEEN
+ test_condition: |-
+ {LOWER_TOLERANCE} AND {UPPER_TOLERANCE}
+ - id: '8016'
+ test_type: Metric_Trend
+ sql_flavor: sap_hana
+ measure: |-
+ {CUSTOM_QUERY}
+ test_operator: NOT BETWEEN
+ test_condition: |-
+ {LOWER_TOLERANCE} AND {UPPER_TOLERANCE}
target_data_lookups:
- id: '1484'
test_id: '1514'
@@ -166,4 +183,26 @@ test_types:
{UPPER_TOLERANCE} AS upper_bound
FROM "{TARGET_SCHEMA}"."{TABLE_NAME}";
error_type: Test Results
+ - id: '8514'
+ test_id: '1514'
+ test_type: Metric_Trend
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT {CUSTOM_QUERY} AS current_count,
+ {LOWER_TOLERANCE} AS lower_bound,
+ {UPPER_TOLERANCE} AS upper_bound
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ error_type: Test Results
+ - id: '8514'
+ test_id: '1514'
+ test_type: Metric_Trend
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT {CUSTOM_QUERY} AS current_count,
+ {LOWER_TOLERANCE} AS lower_bound,
+ {UPPER_TOLERANCE} AS upper_bound
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Min_Date.yaml b/testgen/template/dbsetup_test_types/test_types_Min_Date.yaml
index 01dbf230..2a64f34a 100644
--- a/testgen/template/dbsetup_test_types/test_types_Min_Date.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Min_Date.yaml
@@ -100,6 +100,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8015'
+ test_type: Min_Date
+ sql_flavor: oracle
+ measure: |-
+ SUM(CASE WHEN {COLUMN_NAME} < TO_DATE('{BASELINE_VALUE}', 'YYYY-MM-DD HH24:MI:SS') THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8015'
+ test_type: Min_Date
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(CASE WHEN {COLUMN_NAME} < CAST('{BASELINE_VALUE}' AS {COLUMN_TYPE}) THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1378'
test_id: '1020'
@@ -161,4 +177,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" :: DATE < '{BASELINE_VALUE}' :: DATE GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Test Results
+ - id: '8015'
+ test_id: '1020'
+ test_type: Min_Date
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" < TO_DATE('{BASELINE_VALUE}', 'YYYY-MM-DD HH24:MI:SS') GROUP BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8015'
+ test_id: '1020'
+ test_type: Min_Date
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" < CAST('{BASELINE_VALUE}' AS {COLUMN_TYPE}) GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Min_Val.yaml b/testgen/template/dbsetup_test_types/test_types_Min_Val.yaml
index bfac4c70..56d505ff 100644
--- a/testgen/template/dbsetup_test_types/test_types_Min_Val.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Min_Val.yaml
@@ -100,6 +100,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8016'
+ test_type: Min_Val
+ sql_flavor: oracle
+ measure: |-
+ SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} - 1e-6 THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8016'
+ test_type: Min_Val
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} - 1e-6 THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1379'
test_id: '1021'
@@ -160,4 +176,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", (ABS("{COLUMN_NAME}") - ABS({BASELINE_VALUE})) AS difference_from_baseline FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" < {BASELINE_VALUE} LIMIT {LIMIT};
error_type: Test Results
+ - id: '8016'
+ test_id: '1021'
+ test_type: Min_Val
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", (ABS("{COLUMN_NAME}") - ABS({BASELINE_VALUE})) AS difference_from_baseline FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" < {BASELINE_VALUE} FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8016'
+ test_id: '1021'
+ test_type: Min_Val
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", (ABS("{COLUMN_NAME}") - ABS({BASELINE_VALUE})) AS difference_from_baseline FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" < {BASELINE_VALUE} LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Missing_Pct.yaml b/testgen/template/dbsetup_test_types/test_types_Missing_Pct.yaml
index 3bc7069a..6ddf86a0 100644
--- a/testgen/template/dbsetup_test_types/test_types_Missing_Pct.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Missing_Pct.yaml
@@ -101,6 +101,22 @@ test_types:
test_operator: '>='
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8017'
+ test_type: Missing_Pct
+ sql_flavor: oracle
+ measure: |-
+ ABS(2.0 * ASIN(SQRT(CAST({BASELINE_VALUE_CT} AS NUMBER) / CAST({BASELINE_CT} AS NUMBER))) - 2 * ASIN(SQRT(CAST(COUNT({COLUMN_NAME}) AS NUMBER) / CAST(NULLIF(COUNT(*), 0) AS NUMBER))))
+ test_operator: '>='
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8017'
+ test_type: Missing_Pct
+ sql_flavor: sap_hana
+ measure: |-
+ ABS(2.0 * ASIN(SQRT(CAST({BASELINE_VALUE_CT} AS DECIMAL) / CAST({BASELINE_CT} AS DECIMAL))) - 2 * ASIN(SQRT(CAST(COUNT({COLUMN_NAME}) AS DECIMAL) / CAST(NULLIF(COUNT(*), 0) AS DECIMAL))))
+ test_operator: '>='
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1380'
test_id: '1022'
@@ -161,4 +177,20 @@ test_types:
lookup_query: |-
SELECT * FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NULL OR "{COLUMN_NAME}" :: VARCHAR(255) = '' LIMIT {LIMIT};
error_type: Test Results
+ - id: '8017'
+ test_id: '1022'
+ test_type: Missing_Pct
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT * FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NULL FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8017'
+ test_id: '1022'
+ test_type: Missing_Pct
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT * FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NULL LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Monthly_Rec_Ct.yaml b/testgen/template/dbsetup_test_types/test_types_Monthly_Rec_Ct.yaml
index 4ce0fc6a..ec0fffa4 100644
--- a/testgen/template/dbsetup_test_types/test_types_Monthly_Rec_Ct.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Monthly_Rec_Ct.yaml
@@ -101,6 +101,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8018'
+ test_type: Monthly_Rec_Ct
+ sql_flavor: oracle
+ measure: |-
+ (MAX(<%DATEDIFF_MONTH;{COLUMN_NAME};TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>) - MIN(<%DATEDIFF_MONTH;{COLUMN_NAME};TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>) + 1) - COUNT(DISTINCT <%DATEDIFF_MONTH;{COLUMN_NAME};TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8018'
+ test_type: Monthly_Rec_Ct
+ sql_flavor: sap_hana
+ measure: |-
+ (MAX(<%DATEDIFF_MONTH;{COLUMN_NAME};TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>) - MIN(<%DATEDIFF_MONTH;{COLUMN_NAME};TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>) + 1) - COUNT(DISTINCT <%DATEDIFF_MONTH;{COLUMN_NAME};TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1381'
test_id: '1023'
@@ -185,15 +201,13 @@ test_types:
FROM check_periods c
LEFT JOIN data_by_period d
ON (c.check_period = d.data_period) )
- SELECT TOP {LIMIT} check_period, record_ct,
+ SELECT TOP {LIMIT} check_period AS missing_period, record_ct,
CASE
WHEN record_ct = 0 THEN 'MISSING'
ELSE 'Present'
END as status
FROM data_by_prd_with_prior_next
WHERE record_ct = 0
- OR last_record_ct = 0
- OR next_record_ct = 0
ORDER BY check_period DESC;
error_type: Test Results
- id: '1100'
@@ -228,4 +242,20 @@ test_types:
lookup_query: |-
WITH RECURSIVE daterange(all_dates) AS (SELECT DATE_TRUNC('month', MIN("{COLUMN_NAME}")) :: DATE AS all_dates FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" UNION ALL SELECT DATEADD(MONTH, 1, d.all_dates) :: DATE AS all_dates FROM daterange d WHERE d.all_dates < (SELECT DATE_TRUNC('month', MAX("{COLUMN_NAME}")) :: DATE FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") ), existing_periods AS (SELECT DISTINCT DATE_TRUNC('month',"{COLUMN_NAME}") :: DATE AS period, COUNT(1) AS period_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY DATE_TRUNC('month',"{COLUMN_NAME}") :: DATE ) SELECT p.missing_period, p.prior_available_month, e.period_count as prior_available_month_count, p.next_available_month, f.period_count as next_available_month_count FROM (SELECT d.all_dates as missing_period, MAX(b.period) AS prior_available_month, MIN(c.period) AS next_available_month FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates) p LEFT JOIN existing_periods e ON (p.prior_available_month = e.period) LEFT JOIN existing_periods f ON (p.next_available_month = f.period) ORDER BY p.missing_period LIMIT {LIMIT};
error_type: Test Results
+ - id: '8023'
+ test_id: '1023'
+ test_type: Monthly_Rec_Ct
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ WITH daterange AS (SELECT ADD_MONTHS((SELECT TRUNC(MIN("{COLUMN_NAME}"), 'MM') FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), LEVEL - 1) AS all_dates FROM DUAL CONNECT BY LEVEL <= MONTHS_BETWEEN((SELECT TRUNC(MAX("{COLUMN_NAME}"), 'MM') FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), (SELECT TRUNC(MIN("{COLUMN_NAME}"), 'MM') FROM "{TARGET_SCHEMA}"."{TABLE_NAME}")) + 1), existing_periods AS (SELECT DISTINCT TRUNC("{COLUMN_NAME}", 'MM') AS period, COUNT(1) AS period_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY TRUNC("{COLUMN_NAME}", 'MM')) SELECT p.missing_period, p.prior_available_month, e.period_count AS prior_available_month_count, p.next_available_month, f.period_count AS next_available_month_count FROM (SELECT d.all_dates AS missing_period, MAX(b.period) AS prior_available_month, MIN(c.period) AS next_available_month FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates) p LEFT JOIN existing_periods e ON (p.prior_available_month = e.period) LEFT JOIN existing_periods f ON (p.next_available_month = f.period) ORDER BY p.missing_period FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8023'
+ test_id: '1023'
+ test_type: Monthly_Rec_Ct
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ WITH Pass0 AS (SELECT 1 C FROM DUMMY UNION ALL SELECT 1 FROM DUMMY), Pass1 AS (SELECT 1 C FROM Pass0 A, Pass0 B), Pass2 AS (SELECT 1 C FROM Pass1 A, Pass1 B), Pass3 AS (SELECT 1 C FROM Pass2 A, Pass2 B), nums AS (SELECT ROW_NUMBER() OVER (ORDER BY C) - 1 AS rn FROM Pass3), bounds AS (SELECT TO_DATE(YEAR(MIN("{COLUMN_NAME}")) || '-' || LPAD(MONTH(MIN("{COLUMN_NAME}")), 2, '0') || '-01', 'YYYY-MM-DD') AS min_month, TO_DATE(YEAR(MAX("{COLUMN_NAME}")) || '-' || LPAD(MONTH(MAX("{COLUMN_NAME}")), 2, '0') || '-01', 'YYYY-MM-DD') AS max_month FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), daterange AS (SELECT ADD_MONTHS(b.min_month, n.rn) AS all_dates FROM bounds b, nums n WHERE ADD_MONTHS(b.min_month, n.rn) <= b.max_month), existing_periods AS (SELECT DISTINCT TO_DATE(YEAR("{COLUMN_NAME}") || '-' || LPAD(MONTH("{COLUMN_NAME}"), 2, '0') || '-01', 'YYYY-MM-DD') AS period, COUNT(1) AS period_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY YEAR("{COLUMN_NAME}"), MONTH("{COLUMN_NAME}")) SELECT p.missing_period, p.prior_available_month, e.period_count AS prior_available_month_count, p.next_available_month, f.period_count AS next_available_month_count FROM (SELECT d.all_dates AS missing_period, MAX(b.period) AS prior_available_month, MIN(c.period) AS next_available_month FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates) p LEFT JOIN existing_periods e ON (p.prior_available_month = e.period) LEFT JOIN existing_periods f ON (p.next_available_month = f.period) ORDER BY p.missing_period LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Outlier_Pct_Above.yaml b/testgen/template/dbsetup_test_types/test_types_Outlier_Pct_Above.yaml
index be6ad5eb..cb8ebf91 100644
--- a/testgen/template/dbsetup_test_types/test_types_Outlier_Pct_Above.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Outlier_Pct_Above.yaml
@@ -105,6 +105,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8019'
+ test_type: Outlier_Pct_Above
+ sql_flavor: oracle
+ measure: |-
+ CAST(SUM(CASE WHEN CAST({COLUMN_NAME} AS NUMBER) > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS NUMBER) / NULLIF(COUNT({COLUMN_NAME}), 0)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8019'
+ test_type: Outlier_Pct_Above
+ sql_flavor: sap_hana
+ measure: |-
+ CAST(SUM(CASE WHEN CAST({COLUMN_NAME} AS DECIMAL) > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS DECIMAL) / NULLIF(COUNT({COLUMN_NAME}), 0)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1382'
test_id: '1024'
@@ -166,4 +182,20 @@ test_types:
lookup_query: |-
SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" :: FLOAT > ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;
error_type: Test Results
+ - id: '8019'
+ test_id: '1024'
+ test_type: Outlier_Pct_Above
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CAST("{COLUMN_NAME}" AS NUMBER) > ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC
+ error_type: Test Results
+ - id: '8019'
+ test_id: '1024'
+ test_type: Outlier_Pct_Above
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CAST("{COLUMN_NAME}" AS DECIMAL) > ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Outlier_Pct_Below.yaml b/testgen/template/dbsetup_test_types/test_types_Outlier_Pct_Below.yaml
index 0fd3341a..b2b32d67 100644
--- a/testgen/template/dbsetup_test_types/test_types_Outlier_Pct_Below.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Outlier_Pct_Below.yaml
@@ -105,6 +105,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8020'
+ test_type: Outlier_Pct_Below
+ sql_flavor: oracle
+ measure: |-
+ CAST(SUM(CASE WHEN CAST({COLUMN_NAME} AS NUMBER) < {BASELINE_AVG}-(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS NUMBER) / NULLIF(COUNT({COLUMN_NAME}), 0)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8020'
+ test_type: Outlier_Pct_Below
+ sql_flavor: sap_hana
+ measure: |-
+ CAST(SUM(CASE WHEN CAST({COLUMN_NAME} AS DECIMAL) < {BASELINE_AVG}-(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS DECIMAL) / NULLIF(COUNT({COLUMN_NAME}), 0)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1383'
test_id: '1025'
@@ -112,9 +128,9 @@ test_types:
sql_flavor: bigquery
lookup_type: null
lookup_query: |-
- SELECT ({BASELINE_AVG} + (2 * {BASELINE_SD})) AS outlier_threshold, `{COLUMN_NAME}`, COUNT(*) AS count
+ SELECT ({BASELINE_AVG} - (2 * {BASELINE_SD})) AS outlier_threshold, `{COLUMN_NAME}`, COUNT(*) AS count
FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`
- WHERE CAST(`{COLUMN_NAME}` AS FLOAT64) < ({BASELINE_AVG} + (2 * {BASELINE_SD}))
+ WHERE CAST(`{COLUMN_NAME}` AS FLOAT64) < ({BASELINE_AVG} - (2 * {BASELINE_SD}))
GROUP BY `{COLUMN_NAME}`
ORDER BY `{COLUMN_NAME}` DESC;
error_type: Test Results
@@ -124,7 +140,7 @@ test_types:
sql_flavor: databricks
lookup_type: null
lookup_query: |-
- SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, `{COLUMN_NAME}`, COUNT(*) AS count FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` WHERE `{COLUMN_NAME}` :: FLOAT < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC;
+ SELECT ({BASELINE_AVG} - (2*{BASELINE_SD})) AS outlier_threshold, `{COLUMN_NAME}`, COUNT(*) AS count FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` WHERE `{COLUMN_NAME}` :: FLOAT < ({BASELINE_AVG} - (2*{BASELINE_SD})) GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC;
error_type: Test Results
- id: '1159'
test_id: '1025'
@@ -132,7 +148,7 @@ test_types:
sql_flavor: mssql
lookup_type: null
lookup_query: |-
- SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CAST("{COLUMN_NAME}" AS FLOAT) < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;
+ SELECT ({BASELINE_AVG} - (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CAST("{COLUMN_NAME}" AS FLOAT) < ({BASELINE_AVG} - (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;
error_type: Test Results
- id: '1102'
test_id: '1025'
@@ -140,7 +156,7 @@ test_types:
sql_flavor: postgresql
lookup_type: null
lookup_query: |-
- SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" :: FLOAT < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;
+ SELECT ({BASELINE_AVG} - (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" :: FLOAT < ({BASELINE_AVG} - (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;
error_type: Test Results
- id: '1020'
test_id: '1025'
@@ -148,7 +164,7 @@ test_types:
sql_flavor: redshift
lookup_type: null
lookup_query: |-
- SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" :: FLOAT < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;
+ SELECT ({BASELINE_AVG} - (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" :: FLOAT < ({BASELINE_AVG} - (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;
error_type: Test Results
- id: '1420'
test_id: '1025'
@@ -156,7 +172,7 @@ test_types:
sql_flavor: redshift_spectrum
lookup_type: null
lookup_query: |-
- SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" :: FLOAT < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;
+ SELECT ({BASELINE_AVG} - (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" :: FLOAT < ({BASELINE_AVG} - (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;
error_type: Test Results
- id: '1216'
test_id: '1025'
@@ -164,6 +180,22 @@ test_types:
sql_flavor: snowflake
lookup_type: null
lookup_query: |-
- SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" :: FLOAT < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;
+ SELECT ({BASELINE_AVG} - (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" :: FLOAT < ({BASELINE_AVG} - (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;
+ error_type: Test Results
+ - id: '8020'
+ test_id: '1025'
+ test_type: Outlier_Pct_Below
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT ({BASELINE_AVG} - (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CAST("{COLUMN_NAME}" AS NUMBER) < ({BASELINE_AVG} - (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC
+ error_type: Test Results
+ - id: '8020'
+ test_id: '1025'
+ test_type: Outlier_Pct_Below
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT ({BASELINE_AVG} - (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE CAST("{COLUMN_NAME}" AS DECIMAL) < ({BASELINE_AVG} - (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC
error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Pattern_Match.yaml b/testgen/template/dbsetup_test_types/test_types_Pattern_Match.yaml
index 6fd1f981..3cd3359d 100644
--- a/testgen/template/dbsetup_test_types/test_types_Pattern_Match.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Pattern_Match.yaml
@@ -100,6 +100,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8021'
+ test_type: Pattern_Match
+ sql_flavor: oracle
+ measure: |-
+ COUNT(NULLIF({COLUMN_NAME}, '')) - SUM(CASE WHEN REGEXP_LIKE(NULLIF(TO_CHAR({COLUMN_NAME}), ''), '{BASELINE_VALUE}') THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8021'
+ test_type: Pattern_Match
+ sql_flavor: sap_hana
+ measure: |-
+ COUNT(NULLIF({COLUMN_NAME}, '')) - SUM(CASE WHEN NULLIF(TO_VARCHAR({COLUMN_NAME}), '') LIKE_REGEXPR '{BASELINE_VALUE}' THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1384'
test_id: '1026'
@@ -161,4 +177,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE REGEXP_LIKE(NULLIF("{COLUMN_NAME}"::VARCHAR, ''),'{BASELINE_VALUE}') != 1 GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Test Results
+ - id: '8021'
+ test_id: '1026'
+ test_type: Pattern_Match
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE NOT REGEXP_LIKE(NULLIF(TO_CHAR("{COLUMN_NAME}"), ''), '{BASELINE_VALUE}') GROUP BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8021'
+ test_id: '1026'
+ test_type: Pattern_Match
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE NOT NULLIF(TO_VARCHAR("{COLUMN_NAME}"), '') LIKE_REGEXPR '{BASELINE_VALUE}' GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Recency.yaml b/testgen/template/dbsetup_test_types/test_types_Recency.yaml
index c69df2e2..9607a3ac 100644
--- a/testgen/template/dbsetup_test_types/test_types_Recency.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Recency.yaml
@@ -101,12 +101,29 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8022'
+ test_type: Recency
+ sql_flavor: oracle
+ measure: |-
+ <%DATEDIFF_DAY;MAX({COLUMN_NAME});TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8022'
+ test_type: Recency
+ sql_flavor: sap_hana
+ measure: |-
+ <%DATEDIFF_DAY;MAX({COLUMN_NAME});TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1385'
test_id: '1028'
test_type: Recency
sql_flavor: bigquery
lookup_type: null
+ lookup_redactable_columns: latest_date_available
lookup_query: |-
SELECT DISTINCT col AS latest_date_available, CAST(CAST('{TEST_DATE}' AS DATETIME) AS {COLUMN_TYPE}) AS test_run_date
FROM (SELECT DATE_TRUNC(MAX(`{COLUMN_NAME}`), DAY) AS col FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`)
@@ -118,6 +135,7 @@ test_types:
test_type: Recency
sql_flavor: databricks
lookup_type: null
+ lookup_redactable_columns: latest_date_available
lookup_query: |-
SELECT DISTINCT col AS latest_date_available, '{TEST_DATE}' :: DATE as test_run_date FROM (SELECT MAX(`{COLUMN_NAME}`) AS col FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`) WHERE ABS(<%DATEDIFF_DAY;col;'{TEST_DATE}'::DATE%>) > {THRESHOLD_VALUE} LIMIT {LIMIT};
error_type: Test Results
@@ -126,6 +144,7 @@ test_types:
test_type: Recency
sql_flavor: mssql
lookup_type: null
+ lookup_redactable_columns: latest_date_available
lookup_query: |-
SELECT DISTINCT TOP {LIMIT} col AS latest_date_available, CAST('{TEST_DATE}' AS DATE) AS test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") a WHERE DATEDIFF(day, col, CAST('{TEST_DATE}' AS DATE)) > {THRESHOLD_VALUE};
error_type: Test Results
@@ -134,6 +153,7 @@ test_types:
test_type: Recency
sql_flavor: postgresql
lookup_type: null
+ lookup_redactable_columns: latest_date_available
lookup_query: |-
SELECT DISTINCT col AS latest_date_available, '{TEST_DATE}' :: DATE as test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") a WHERE <%DATEDIFF_DAY;col;'{TEST_DATE}'::DATE%> > {THRESHOLD_VALUE} LIMIT {LIMIT};
error_type: Test Results
@@ -142,6 +162,7 @@ test_types:
test_type: Recency
sql_flavor: redshift
lookup_type: null
+ lookup_redactable_columns: latest_date_available
lookup_query: |-
SELECT DISTINCT col AS latest_date_available, '{TEST_DATE}' :: DATE as test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") WHERE DATEDIFF('D', col, '{TEST_DATE}'::DATE) > {THRESHOLD_VALUE} LIMIT {LIMIT};
error_type: Test Results
@@ -150,6 +171,7 @@ test_types:
test_type: Recency
sql_flavor: redshift_spectrum
lookup_type: null
+ lookup_redactable_columns: latest_date_available
lookup_query: |-
SELECT DISTINCT col AS latest_date_available, '{TEST_DATE}' :: DATE as test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") WHERE DATEDIFF('D', col, '{TEST_DATE}'::DATE) > {THRESHOLD_VALUE} LIMIT {LIMIT};
error_type: Test Results
@@ -158,7 +180,26 @@ test_types:
test_type: Recency
sql_flavor: snowflake
lookup_type: null
+ lookup_redactable_columns: latest_date_available
lookup_query: |-
SELECT DISTINCT col AS latest_date_available, '{TEST_DATE}' :: DATE as test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") WHERE DATEDIFF('D', col, '{TEST_DATE}'::DATE) > {THRESHOLD_VALUE} LIMIT {LIMIT};
error_type: Test Results
+ - id: '8022'
+ test_id: '1028'
+ test_type: Recency
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_redactable_columns: latest_date_available
+ lookup_query: |-
+ SELECT DISTINCT col AS latest_date_available, TO_DATE('{TEST_DATE}', 'YYYY-MM-DD HH24:MI:SS') AS test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") WHERE <%DATEDIFF_DAY;col;TO_DATE('{TEST_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> > {THRESHOLD_VALUE} FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8022'
+ test_id: '1028'
+ test_type: Recency
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_redactable_columns: latest_date_available
+ lookup_query: |-
+ SELECT DISTINCT col AS latest_date_available, TO_DATE('{TEST_DATE}', 'YYYY-MM-DD HH24:MI:SS') AS test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") WHERE <%DATEDIFF_DAY;col;TO_DATE('{TEST_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> > {THRESHOLD_VALUE} LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Required.yaml b/testgen/template/dbsetup_test_types/test_types_Required.yaml
index fcb3200b..f11ceb36 100644
--- a/testgen/template/dbsetup_test_types/test_types_Required.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Required.yaml
@@ -99,6 +99,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8023'
+ test_type: Required
+ sql_flavor: oracle
+ measure: |-
+ COUNT(*) - COUNT({COLUMN_NAME})
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8023'
+ test_type: Required
+ sql_flavor: sap_hana
+ measure: |-
+ COUNT(*) - COUNT({COLUMN_NAME})
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1386'
test_id: '1030'
@@ -159,4 +175,20 @@ test_types:
lookup_query: |-
SELECT * FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NULL LIMIT {LIMIT};
error_type: Test Results
+ - id: '8023'
+ test_id: '1030'
+ test_type: Required
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT * FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NULL FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8023'
+ test_id: '1030'
+ test_type: Required
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT * FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NULL LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Row_Ct.yaml b/testgen/template/dbsetup_test_types/test_types_Row_Ct.yaml
index 47c71112..4a373834 100644
--- a/testgen/template/dbsetup_test_types/test_types_Row_Ct.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Row_Ct.yaml
@@ -98,6 +98,22 @@ test_types:
test_operator: <
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8024'
+ test_type: Row_Ct
+ sql_flavor: oracle
+ measure: |-
+ COUNT(*)
+ test_operator: <
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8024'
+ test_type: Row_Ct
+ sql_flavor: sap_hana
+ measure: |-
+ COUNT(*)
+ test_operator: <
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1387'
test_id: '1031'
@@ -162,4 +178,20 @@ test_types:
lookup_query: |-
WITH CTE AS (SELECT COUNT(*) AS current_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") SELECT current_count, ABS(ROUND(100 *(current_count - {THRESHOLD_VALUE}) :: FLOAT / {THRESHOLD_VALUE} :: FLOAT,2)) AS row_count_pct_decrease FROM cte WHERE current_count < {THRESHOLD_VALUE};
error_type: Test Results
+ - id: '8024'
+ test_id: '1031'
+ test_type: Row_Ct
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ WITH CTE AS (SELECT COUNT(*) AS current_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") SELECT current_count, ABS(ROUND(100 * (current_count - {THRESHOLD_VALUE}) / {THRESHOLD_VALUE}, 2)) AS row_count_pct_decrease FROM cte WHERE current_count < {THRESHOLD_VALUE}
+ error_type: Test Results
+ - id: '8024'
+ test_id: '1031'
+ test_type: Row_Ct
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ WITH CTE AS (SELECT COUNT(*) AS current_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") SELECT current_count, ABS(ROUND(100 * (current_count - {THRESHOLD_VALUE}) / {THRESHOLD_VALUE}, 2)) AS row_count_pct_decrease FROM cte WHERE current_count < {THRESHOLD_VALUE}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Row_Ct_Pct.yaml b/testgen/template/dbsetup_test_types/test_types_Row_Ct_Pct.yaml
index 08209512..6b176c7a 100644
--- a/testgen/template/dbsetup_test_types/test_types_Row_Ct_Pct.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Row_Ct_Pct.yaml
@@ -99,6 +99,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8025'
+ test_type: Row_Ct_Pct
+ sql_flavor: oracle
+ measure: |-
+ ABS(ROUND(100.0 * (COUNT(*) - {BASELINE_CT}) / {BASELINE_CT}, 2))
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8025'
+ test_type: Row_Ct_Pct
+ sql_flavor: sap_hana
+ measure: |-
+ ABS(ROUND(100.0 * (COUNT(*) - {BASELINE_CT}) / {BASELINE_CT}, 2))
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1388'
test_id: '1032'
@@ -162,4 +178,20 @@ test_types:
lookup_query: |-
WITH CTE AS (SELECT COUNT(*) AS current_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") SELECT current_count, {BASELINE_CT} AS baseline_count, ABS(ROUND(100 * (current_count - {BASELINE_CT}) :: FLOAT / {BASELINE_CT} :: FLOAT,2)) AS row_count_pct_difference FROM cte;
error_type: Test Results
+ - id: '8025'
+ test_id: '1032'
+ test_type: Row_Ct_Pct
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ WITH CTE AS (SELECT COUNT(*) AS current_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") SELECT current_count, {BASELINE_CT} AS baseline_count, ABS(ROUND(100 * (current_count - {BASELINE_CT}) / {BASELINE_CT}, 2)) AS row_count_pct_difference FROM cte
+ error_type: Test Results
+ - id: '8025'
+ test_id: '1032'
+ test_type: Row_Ct_Pct
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ WITH CTE AS (SELECT COUNT(*) AS current_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") SELECT current_count, {BASELINE_CT} AS baseline_count, ABS(ROUND(100 * (current_count - {BASELINE_CT}) / {BASELINE_CT}, 2)) AS row_count_pct_difference FROM cte
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Schema_Drift.yaml b/testgen/template/dbsetup_test_types/test_types_Schema_Drift.yaml
index d1ea92cf..e1e23dcd 100644
--- a/testgen/template/dbsetup_test_types/test_types_Schema_Drift.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Schema_Drift.yaml
@@ -46,7 +46,7 @@ test_types:
AND id <> '{TEST_RUN_ID}'::UUID
),
table_changes AS (
- SELECT
+ SELECT
dsl.table_name,
MAX(prev_test.last_run_time) as window_start,
MAX(CASE WHEN dsl.column_id IS NULL AND dsl.change = 'A' THEN dsl.change_date ELSE NULL END) as last_add_date,
@@ -61,7 +61,7 @@ test_types:
AND dsl.change_date > prev_test.last_run_time
GROUP BY dsl.table_name
)
- SELECT
+ SELECT
'{TEST_TYPE}' AS test_type,
'{TEST_DEFINITION_ID}' AS test_definition_id,
'{TEST_SUITE_ID}' AS test_suite_id,
@@ -70,14 +70,14 @@ test_types:
'{SCHEMA_NAME}' AS schema_name,
table_name,
'{INPUT_PARAMETERS}' AS input_parameters,
- (CASE
+ (CASE
WHEN last_add_date IS NOT NULL AND (last_drop_date IS NULL OR last_add_date > last_drop_date) THEN 'A'
WHEN last_drop_date IS NOT NULL AND (last_add_date IS NULL OR last_drop_date > last_add_date) THEN 'D'
- ELSE 'M'
+ ELSE 'M'
END)
- || '|' || column_adds
- || '|' || column_drops
- || '|' || column_mods
+ || '|' || column_adds
+ || '|' || column_drops
+ || '|' || column_mods
|| '|' || window_start::TEXT
AS result_signal,
0 AS result_code,
@@ -101,7 +101,7 @@ test_types:
AND id <> '{TEST_RUN_ID}'::UUID
),
table_changes AS (
- SELECT
+ SELECT
dsl.table_name,
MAX(prev_test.last_run_time) as window_start,
MAX(CASE WHEN dsl.column_id IS NULL AND dsl.change = 'A' THEN dsl.change_date ELSE NULL END) as last_add_date,
@@ -116,7 +116,7 @@ test_types:
AND dsl.change_date > prev_test.last_run_time
GROUP BY dsl.table_name
)
- SELECT
+ SELECT
'{TEST_TYPE}' AS test_type,
'{TEST_DEFINITION_ID}' AS test_definition_id,
'{TEST_SUITE_ID}' AS test_suite_id,
@@ -125,14 +125,14 @@ test_types:
'{SCHEMA_NAME}' AS schema_name,
table_name,
'{INPUT_PARAMETERS}' AS input_parameters,
- (CASE
+ (CASE
WHEN last_add_date IS NOT NULL AND (last_drop_date IS NULL OR last_add_date > last_drop_date) THEN 'A'
WHEN last_drop_date IS NOT NULL AND (last_add_date IS NULL OR last_drop_date > last_add_date) THEN 'D'
- ELSE 'M'
+ ELSE 'M'
END)
- || '|' || column_adds
- || '|' || column_drops
- || '|' || column_mods
+ || '|' || column_adds
+ || '|' || column_drops
+ || '|' || column_mods
|| '|' || window_start::TEXT
AS result_signal,
0 AS result_code,
@@ -156,7 +156,7 @@ test_types:
AND id <> '{TEST_RUN_ID}'::UUID
),
table_changes AS (
- SELECT
+ SELECT
dsl.table_name,
MAX(prev_test.last_run_time) as window_start,
MAX(CASE WHEN dsl.column_id IS NULL AND dsl.change = 'A' THEN dsl.change_date ELSE NULL END) as last_add_date,
@@ -171,7 +171,7 @@ test_types:
AND dsl.change_date > prev_test.last_run_time
GROUP BY dsl.table_name
)
- SELECT
+ SELECT
'{TEST_TYPE}' AS test_type,
'{TEST_DEFINITION_ID}' AS test_definition_id,
'{TEST_SUITE_ID}' AS test_suite_id,
@@ -180,14 +180,14 @@ test_types:
'{SCHEMA_NAME}' AS schema_name,
table_name,
'{INPUT_PARAMETERS}' AS input_parameters,
- (CASE
+ (CASE
WHEN last_add_date IS NOT NULL AND (last_drop_date IS NULL OR last_add_date > last_drop_date) THEN 'A'
WHEN last_drop_date IS NOT NULL AND (last_add_date IS NULL OR last_drop_date > last_add_date) THEN 'D'
- ELSE 'M'
+ ELSE 'M'
END)
- || '|' || column_adds
- || '|' || column_drops
- || '|' || column_mods
+ || '|' || column_adds
+ || '|' || column_drops
+ || '|' || column_mods
|| '|' || window_start::TEXT
AS result_signal,
0 AS result_code,
@@ -211,7 +211,7 @@ test_types:
AND id <> '{TEST_RUN_ID}'::UUID
),
table_changes AS (
- SELECT
+ SELECT
dsl.table_name,
MAX(prev_test.last_run_time) as window_start,
MAX(CASE WHEN dsl.column_id IS NULL AND dsl.change = 'A' THEN dsl.change_date ELSE NULL END) as last_add_date,
@@ -226,7 +226,7 @@ test_types:
AND dsl.change_date > prev_test.last_run_time
GROUP BY dsl.table_name
)
- SELECT
+ SELECT
'{TEST_TYPE}' AS test_type,
'{TEST_DEFINITION_ID}' AS test_definition_id,
'{TEST_SUITE_ID}' AS test_suite_id,
@@ -235,14 +235,14 @@ test_types:
'{SCHEMA_NAME}' AS schema_name,
table_name,
'{INPUT_PARAMETERS}' AS input_parameters,
- (CASE
+ (CASE
WHEN last_add_date IS NOT NULL AND (last_drop_date IS NULL OR last_add_date > last_drop_date) THEN 'A'
WHEN last_drop_date IS NOT NULL AND (last_add_date IS NULL OR last_drop_date > last_add_date) THEN 'D'
- ELSE 'M'
+ ELSE 'M'
END)
- || '|' || column_adds
- || '|' || column_drops
- || '|' || column_mods
+ || '|' || column_adds
+ || '|' || column_drops
+ || '|' || column_mods
|| '|' || window_start::TEXT
AS result_signal,
0 AS result_code,
@@ -266,7 +266,7 @@ test_types:
AND id <> '{TEST_RUN_ID}'::UUID
),
table_changes AS (
- SELECT
+ SELECT
dsl.table_name,
MAX(prev_test.last_run_time) as window_start,
MAX(CASE WHEN dsl.column_id IS NULL AND dsl.change = 'A' THEN dsl.change_date ELSE NULL END) as last_add_date,
@@ -281,7 +281,7 @@ test_types:
AND dsl.change_date > prev_test.last_run_time
GROUP BY dsl.table_name
)
- SELECT
+ SELECT
'{TEST_TYPE}' AS test_type,
'{TEST_DEFINITION_ID}' AS test_definition_id,
'{TEST_SUITE_ID}' AS test_suite_id,
@@ -290,14 +290,14 @@ test_types:
'{SCHEMA_NAME}' AS schema_name,
table_name,
'{INPUT_PARAMETERS}' AS input_parameters,
- (CASE
+ (CASE
WHEN last_add_date IS NOT NULL AND (last_drop_date IS NULL OR last_add_date > last_drop_date) THEN 'A'
WHEN last_drop_date IS NOT NULL AND (last_add_date IS NULL OR last_drop_date > last_add_date) THEN 'D'
- ELSE 'M'
+ ELSE 'M'
END)
- || '|' || column_adds
- || '|' || column_drops
- || '|' || column_mods
+ || '|' || column_adds
+ || '|' || column_drops
+ || '|' || column_mods
|| '|' || window_start::TEXT
AS result_signal,
0 AS result_code,
@@ -321,7 +321,7 @@ test_types:
AND id <> '{TEST_RUN_ID}'::UUID
),
table_changes AS (
- SELECT
+ SELECT
dsl.table_name,
MAX(prev_test.last_run_time) as window_start,
MAX(CASE WHEN dsl.column_id IS NULL AND dsl.change = 'A' THEN dsl.change_date ELSE NULL END) as last_add_date,
@@ -336,7 +336,7 @@ test_types:
AND dsl.change_date > prev_test.last_run_time
GROUP BY dsl.table_name
)
- SELECT
+ SELECT
'{TEST_TYPE}' AS test_type,
'{TEST_DEFINITION_ID}' AS test_definition_id,
'{TEST_SUITE_ID}' AS test_suite_id,
@@ -345,14 +345,14 @@ test_types:
'{SCHEMA_NAME}' AS schema_name,
table_name,
'{INPUT_PARAMETERS}' AS input_parameters,
- (CASE
+ (CASE
WHEN last_add_date IS NOT NULL AND (last_drop_date IS NULL OR last_add_date > last_drop_date) THEN 'A'
WHEN last_drop_date IS NOT NULL AND (last_add_date IS NULL OR last_drop_date > last_add_date) THEN 'D'
- ELSE 'M'
+ ELSE 'M'
END)
- || '|' || column_adds
- || '|' || column_drops
- || '|' || column_mods
+ || '|' || column_adds
+ || '|' || column_drops
+ || '|' || column_mods
|| '|' || window_start::TEXT
AS result_signal,
0 AS result_code,
@@ -376,7 +376,7 @@ test_types:
AND id <> '{TEST_RUN_ID}'::UUID
),
table_changes AS (
- SELECT
+ SELECT
dsl.table_name,
MAX(prev_test.last_run_time) as window_start,
MAX(CASE WHEN dsl.column_id IS NULL AND dsl.change = 'A' THEN dsl.change_date ELSE NULL END) as last_add_date,
@@ -391,7 +391,7 @@ test_types:
AND dsl.change_date > prev_test.last_run_time
GROUP BY dsl.table_name
)
- SELECT
+ SELECT
'{TEST_TYPE}' AS test_type,
'{TEST_DEFINITION_ID}' AS test_definition_id,
'{TEST_SUITE_ID}' AS test_suite_id,
@@ -400,14 +400,124 @@ test_types:
'{SCHEMA_NAME}' AS schema_name,
table_name,
'{INPUT_PARAMETERS}' AS input_parameters,
- (CASE
+ (CASE
WHEN last_add_date IS NOT NULL AND (last_drop_date IS NULL OR last_add_date > last_drop_date) THEN 'A'
WHEN last_drop_date IS NOT NULL AND (last_add_date IS NULL OR last_drop_date > last_add_date) THEN 'D'
- ELSE 'M'
+ ELSE 'M'
END)
- || '|' || column_adds
- || '|' || column_drops
- || '|' || column_mods
+ || '|' || column_adds
+ || '|' || column_drops
+ || '|' || column_mods
+ || '|' || window_start::TEXT
+ AS result_signal,
+ 0 AS result_code,
+ CASE WHEN last_add_date IS NOT NULL AND (last_drop_date IS NULL OR last_add_date > last_drop_date) THEN 'Table added. ' ELSE '' END
+ || CASE WHEN last_drop_date IS NOT NULL AND (last_add_date IS NULL OR last_drop_date > last_add_date) THEN 'Table dropped. ' ELSE '' END
+ || CASE WHEN column_adds > 0 THEN column_adds || ' columns added. ' ELSE '' END
+ || CASE WHEN column_drops > 0 THEN column_drops || ' columns dropped. ' ELSE '' END
+ || CASE WHEN column_mods > 0 THEN column_mods || ' columns modified. ' ELSE '' END
+ AS result_message,
+ column_adds + column_drops + column_mods AS result_measure
+ FROM table_changes;
+ - id: '8014'
+ test_type: Schema_Drift
+ sql_flavor: oracle
+ template: |-
+ WITH prev_test AS (
+ SELECT MAX(test_starttime) AS last_run_time
+ FROM {APP_SCHEMA_NAME}.test_runs
+ WHERE test_suite_id = '{TEST_SUITE_ID}'::UUID
+ -- Ignore current run
+ AND id <> '{TEST_RUN_ID}'::UUID
+ ),
+ table_changes AS (
+ SELECT
+ dsl.table_name,
+ MAX(prev_test.last_run_time) as window_start,
+ MAX(CASE WHEN dsl.column_id IS NULL AND dsl.change = 'A' THEN dsl.change_date ELSE NULL END) as last_add_date,
+ MAX(CASE WHEN dsl.column_id IS NULL AND dsl.change = 'D' THEN dsl.change_date ELSE NULL END) as last_drop_date,
+ COUNT(*) FILTER (WHERE dsl.column_id IS NOT NULL AND dsl.change = 'A') AS column_adds,
+ COUNT(*) FILTER (WHERE dsl.column_id IS NOT NULL AND dsl.change = 'D') AS column_drops,
+ COUNT(*) FILTER (WHERE dsl.column_id IS NOT NULL AND dsl.change = 'M') AS column_mods
+ FROM {APP_SCHEMA_NAME}.data_structure_log dsl
+ CROSS JOIN prev_test
+ WHERE dsl.table_groups_id = '{TABLE_GROUPS_ID}'::UUID
+ -- if no previous tests, this comparision yelds null and nothing is counted
+ AND dsl.change_date > prev_test.last_run_time
+ GROUP BY dsl.table_name
+ )
+ SELECT
+ '{TEST_TYPE}' AS test_type,
+ '{TEST_DEFINITION_ID}' AS test_definition_id,
+ '{TEST_SUITE_ID}' AS test_suite_id,
+ '{TEST_RUN_ID}' AS test_run_id,
+ '{RUN_DATE}' AS test_time,
+ '{SCHEMA_NAME}' AS schema_name,
+ table_name,
+ '{INPUT_PARAMETERS}' AS input_parameters,
+ (CASE
+ WHEN last_add_date IS NOT NULL AND (last_drop_date IS NULL OR last_add_date > last_drop_date) THEN 'A'
+ WHEN last_drop_date IS NOT NULL AND (last_add_date IS NULL OR last_drop_date > last_add_date) THEN 'D'
+ ELSE 'M'
+ END)
+ || '|' || column_adds
+ || '|' || column_drops
+ || '|' || column_mods
+ || '|' || window_start::TEXT
+ AS result_signal,
+ 0 AS result_code,
+ CASE WHEN last_add_date IS NOT NULL AND (last_drop_date IS NULL OR last_add_date > last_drop_date) THEN 'Table added. ' ELSE '' END
+ || CASE WHEN last_drop_date IS NOT NULL AND (last_add_date IS NULL OR last_drop_date > last_add_date) THEN 'Table dropped. ' ELSE '' END
+ || CASE WHEN column_adds > 0 THEN column_adds || ' columns added. ' ELSE '' END
+ || CASE WHEN column_drops > 0 THEN column_drops || ' columns dropped. ' ELSE '' END
+ || CASE WHEN column_mods > 0 THEN column_mods || ' columns modified. ' ELSE '' END
+ AS result_message,
+ column_adds + column_drops + column_mods AS result_measure
+ FROM table_changes;
+ - id: '8014'
+ test_type: Schema_Drift
+ sql_flavor: sap_hana
+ template: |-
+ WITH prev_test AS (
+ SELECT MAX(test_starttime) AS last_run_time
+ FROM {APP_SCHEMA_NAME}.test_runs
+ WHERE test_suite_id = '{TEST_SUITE_ID}'::UUID
+ -- Ignore current run
+ AND id <> '{TEST_RUN_ID}'::UUID
+ ),
+ table_changes AS (
+ SELECT
+ dsl.table_name,
+ MAX(prev_test.last_run_time) as window_start,
+ MAX(CASE WHEN dsl.column_id IS NULL AND dsl.change = 'A' THEN dsl.change_date ELSE NULL END) as last_add_date,
+ MAX(CASE WHEN dsl.column_id IS NULL AND dsl.change = 'D' THEN dsl.change_date ELSE NULL END) as last_drop_date,
+ COUNT(*) FILTER (WHERE dsl.column_id IS NOT NULL AND dsl.change = 'A') AS column_adds,
+ COUNT(*) FILTER (WHERE dsl.column_id IS NOT NULL AND dsl.change = 'D') AS column_drops,
+ COUNT(*) FILTER (WHERE dsl.column_id IS NOT NULL AND dsl.change = 'M') AS column_mods
+ FROM {APP_SCHEMA_NAME}.data_structure_log dsl
+ CROSS JOIN prev_test
+ WHERE dsl.table_groups_id = '{TABLE_GROUPS_ID}'::UUID
+ -- if no previous tests, this comparision yelds null and nothing is counted
+ AND dsl.change_date > prev_test.last_run_time
+ GROUP BY dsl.table_name
+ )
+ SELECT
+ '{TEST_TYPE}' AS test_type,
+ '{TEST_DEFINITION_ID}' AS test_definition_id,
+ '{TEST_SUITE_ID}' AS test_suite_id,
+ '{TEST_RUN_ID}' AS test_run_id,
+ '{RUN_DATE}' AS test_time,
+ '{SCHEMA_NAME}' AS schema_name,
+ table_name,
+ '{INPUT_PARAMETERS}' AS input_parameters,
+ (CASE
+ WHEN last_add_date IS NOT NULL AND (last_drop_date IS NULL OR last_add_date > last_drop_date) THEN 'A'
+ WHEN last_drop_date IS NOT NULL AND (last_add_date IS NULL OR last_drop_date > last_add_date) THEN 'D'
+ ELSE 'M'
+ END)
+ || '|' || column_adds
+ || '|' || column_drops
+ || '|' || column_mods
|| '|' || window_start::TEXT
AS result_signal,
0 AS result_code,
diff --git a/testgen/template/dbsetup_test_types/test_types_Street_Addr_Pattern.yaml b/testgen/template/dbsetup_test_types/test_types_Street_Addr_Pattern.yaml
index c5f9a5c6..31004340 100644
--- a/testgen/template/dbsetup_test_types/test_types_Street_Addr_Pattern.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Street_Addr_Pattern.yaml
@@ -101,6 +101,22 @@ test_types:
test_operator: <
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8026'
+ test_type: Street_Addr_Pattern
+ sql_flavor: oracle
+ measure: |-
+ 100.0*SUM(CASE WHEN REGEXP_LIKE(TO_CHAR({COLUMN_NAME}), '^[0-9]{1,5}[a-zA-Z]?[[:space:]][[:alnum:]_]{1,5}\.?[[:space:]]?[[:alnum:]_]*[[:space:]]?[[:alnum:]_]*[[:space:]][a-zA-Z]{1,6}\.?[[:space:]]?[0-9]{0,5}[A-Z]{0,1}$') THEN 1 ELSE 0 END) / NULLIF(COUNT({COLUMN_NAME}), 0)
+ test_operator: <
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8026'
+ test_type: Street_Addr_Pattern
+ sql_flavor: sap_hana
+ measure: |-
+ 100.0*SUM(CASE WHEN TO_VARCHAR({COLUMN_NAME}) LIKE_REGEXPR '^[0-9]{1,5}[a-zA-Z]?[[:space:]][[:alnum:]_]{1,5}\.?[[:space:]]?[[:alnum:]_]*[[:space:]]?[[:alnum:]_]*[[:space:]][a-zA-Z]{1,6}\.?[[:space:]]?[0-9]{0,5}[A-Z]{0,1}$' THEN 1 ELSE 0 END) / NULLIF(COUNT({COLUMN_NAME}), 0)
+ test_operator: <
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1389'
test_id: '1033'
@@ -129,7 +145,7 @@ test_types:
sql_flavor: mssql
lookup_type: null
lookup_query: |-
- SELECT DISTINCT TOP {LIMIT} "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE UPPER("{COLUMN_NAME}") NOT LIKE '[1-9]% [A-Z]% %' AND CHARINDEX(' ', "{COLUMN_NAME}") NOT BETWEEN 2 AND 6 GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;
+ SELECT DISTINCT TOP {LIMIT} "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE UPPER("{COLUMN_NAME}") NOT LIKE '[1-9]% [A-Z]% %' OR CHARINDEX(' ', "{COLUMN_NAME}") NOT BETWEEN 2 AND 6 GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;
error_type: Test Results
- id: '1108'
test_id: '1033'
@@ -163,4 +179,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE REGEXP_LIKE("{COLUMN_NAME}"::VARCHAR, '^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$') != 1 GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT {LIMIT};
error_type: Test Results
+ - id: '8033'
+ test_id: '1033'
+ test_type: Street_Addr_Pattern
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE NOT REGEXP_LIKE(TO_CHAR("{COLUMN_NAME}"), '^[0-9]{1,5}[a-zA-Z]?[[:space:]][[:alnum:]_]{1,5}\.?[[:space:]]?[[:alnum:]_]*[[:space:]]?[[:alnum:]_]*[[:space:]][a-zA-Z]{1,6}\.?[[:space:]]?[0-9]{0,5}[A-Z]{0,1}$') GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8033'
+ test_id: '1033'
+ test_type: Street_Addr_Pattern
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE NOT TO_VARCHAR("{COLUMN_NAME}") LIKE_REGEXPR '^[0-9]{1,5}[a-zA-Z]?[[:space:]][[:alnum:]_]{1,5}\.?[[:space:]]?[[:alnum:]_]*[[:space:]]?[[:alnum:]_]*[[:space:]][a-zA-Z]{1,6}\.?[[:space:]]?[0-9]{0,5}[A-Z]{0,1}$' GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Table_Freshness.yaml b/testgen/template/dbsetup_test_types/test_types_Table_Freshness.yaml
index ed3e6340..27e89cf0 100644
--- a/testgen/template/dbsetup_test_types/test_types_Table_Freshness.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Table_Freshness.yaml
@@ -55,9 +55,9 @@ test_types:
{SKIP_ERRORS} AS skip_errors,
'{INPUT_PARAMETERS}' AS input_parameters,
fingerprint AS result_signal,
- CASE
+ CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
- ELSE 1
+ ELSE 1
END AS result_code,
CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 'No table change detected.'
@@ -68,7 +68,7 @@ test_types:
ELSE 1
END AS result_measure
FROM (
- SELECT {CUSTOM_QUERY} AS fingerprint
+ SELECT TO_HEX(MD5({CUSTOM_QUERY})) AS fingerprint
FROM `{SCHEMA_NAME}.{TABLE_NAME}`
WHERE {SUBSET_CONDITION}
) test;
@@ -88,9 +88,9 @@ test_types:
{SKIP_ERRORS} as skip_errors,
'{INPUT_PARAMETERS}' as input_parameters,
fingerprint as result_signal,
- CASE
+ CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
- ELSE 1
+ ELSE 1
END AS result_code,
CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 'No table change detected.'
@@ -100,7 +100,7 @@ test_types:
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
ELSE 1
END AS result_measure
- FROM ( SELECT {CUSTOM_QUERY} as fingerprint
+ FROM ( SELECT MD5({CUSTOM_QUERY}) as fingerprint
FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
WHERE {SUBSET_CONDITION}
) test;
@@ -120,9 +120,9 @@ test_types:
{SKIP_ERRORS} as skip_errors,
'{INPUT_PARAMETERS}' as input_parameters,
fingerprint as result_signal,
- CASE
+ CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
- ELSE 1
+ ELSE 1
END AS result_code,
CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 'No table change detected.'
@@ -132,7 +132,7 @@ test_types:
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
ELSE 1
END AS result_measure
- FROM ( SELECT {CUSTOM_QUERY} as fingerprint
+ FROM ( SELECT LOWER(CONVERT(VARCHAR(40), HASHBYTES('MD5', CAST({CUSTOM_QUERY} AS VARCHAR(MAX))), 2)) as fingerprint
FROM "{SCHEMA_NAME}"."{TABLE_NAME}" WITH (NOLOCK)
WHERE {SUBSET_CONDITION}
) test;
@@ -152,9 +152,9 @@ test_types:
{SKIP_ERRORS} as skip_errors,
'{INPUT_PARAMETERS}' as input_parameters,
fingerprint as result_signal,
- CASE
+ CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
- ELSE 1
+ ELSE 1
END AS result_code,
CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 'No table change detected.'
@@ -164,7 +164,7 @@ test_types:
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
ELSE 1
END AS result_measure
- FROM ( SELECT {CUSTOM_QUERY} as fingerprint
+ FROM ( SELECT MD5({CUSTOM_QUERY}) as fingerprint
FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
WHERE {SUBSET_CONDITION}
) test;
@@ -184,9 +184,9 @@ test_types:
{SKIP_ERRORS} as skip_errors,
'{INPUT_PARAMETERS}' as input_parameters,
fingerprint as result_signal,
- CASE
+ CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
- ELSE 1
+ ELSE 1
END AS result_code,
CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 'No table change detected.'
@@ -196,7 +196,7 @@ test_types:
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
ELSE 1
END AS result_measure
- FROM ( SELECT {CUSTOM_QUERY} as fingerprint
+ FROM ( SELECT MD5({CUSTOM_QUERY}) as fingerprint
FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
WHERE {SUBSET_CONDITION}
) test;
@@ -216,9 +216,9 @@ test_types:
{SKIP_ERRORS} as skip_errors,
'{INPUT_PARAMETERS}' as input_parameters,
fingerprint as result_signal,
- CASE
+ CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
- ELSE 1
+ ELSE 1
END AS result_code,
CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 'No table change detected.'
@@ -228,7 +228,7 @@ test_types:
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
ELSE 1
END AS result_measure
- FROM ( SELECT {CUSTOM_QUERY} as fingerprint
+ FROM ( SELECT MD5({CUSTOM_QUERY}) as fingerprint
FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
WHERE {SUBSET_CONDITION}
) test;
@@ -248,9 +248,9 @@ test_types:
{SKIP_ERRORS} as skip_errors,
'{INPUT_PARAMETERS}' as input_parameters,
fingerprint as result_signal,
- CASE
+ CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
- ELSE 1
+ ELSE 1
END AS result_code,
CASE
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 'No table change detected.'
@@ -260,7 +260,71 @@ test_types:
WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
ELSE 1
END AS result_measure
- FROM ( SELECT {CUSTOM_QUERY} as fingerprint
+ FROM ( SELECT MD5({CUSTOM_QUERY}) as fingerprint
FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
WHERE {SUBSET_CONDITION}
) test;
+ - id: '8012'
+ test_type: Table_Freshness
+ sql_flavor: oracle
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ fingerprint as result_signal,
+ CASE
+ WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
+ ELSE 1
+ END AS result_code,
+ CASE
+ WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 'No table change detected.'
+ ELSE 'Table change detected.'
+ END AS result_message,
+ CASE
+ WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
+ ELSE 1
+ END AS result_measure
+ FROM ( SELECT LOWER(RAWTOHEX(STANDARD_HASH({CUSTOM_QUERY}, 'MD5'))) as fingerprint
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ ) test
+ - id: '8012'
+ test_type: Table_Freshness
+ sql_flavor: sap_hana
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ fingerprint as result_signal,
+ CASE
+ WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
+ ELSE 1
+ END AS result_code,
+ CASE
+ WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 'No table change detected.'
+ ELSE 'Table change detected.'
+ END AS result_message,
+ CASE
+ WHEN '{LOWER_TOLERANCE}' = 'NULL' OR fingerprint = '{LOWER_TOLERANCE}' THEN 0
+ ELSE 1
+ END AS result_measure
+ FROM ( SELECT LOWER(BINTOHEX(HASH_MD5(TO_BINARY({CUSTOM_QUERY})))) as fingerprint
+ FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}
+ WHERE {SUBSET_CONDITION}
+ ) test
diff --git a/testgen/template/dbsetup_test_types/test_types_Timeframe_Combo_Gain.yaml b/testgen/template/dbsetup_test_types/test_types_Timeframe_Combo_Gain.yaml
index d4d1152b..c03bfd5f 100644
--- a/testgen/template/dbsetup_test_types/test_types_Timeframe_Combo_Gain.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Timeframe_Combo_Gain.yaml
@@ -158,6 +158,46 @@ test_types:
GROUP BY {COLUMN_NAME_NO_QUOTES}
LIMIT {LIMIT};
error_type: Test Results
+ - id: '8508'
+ test_id: '1508'
+ test_type: Timeframe_Combo_Gain
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") - 2 * {WINDOW_DAYS}
+ AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") - {WINDOW_DAYS}
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
+ MINUS
+ SELECT {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") - {WINDOW_DAYS}
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
+ FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8508'
+ test_id: '1508'
+ test_type: Timeframe_Combo_Gain
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), -2 * {WINDOW_DAYS})
+ AND {WINDOW_DATE_COLUMN} < ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), -{WINDOW_DAYS})
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
+ EXCEPT
+ SELECT {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), -{WINDOW_DAYS})
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
+ LIMIT {LIMIT}
+ error_type: Test Results
test_templates:
- id: '2507'
test_type: Timeframe_Combo_Gain
@@ -479,3 +519,85 @@ test_types:
AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}))
GROUP BY {COLUMN_NAME_NO_QUOTES}
) test;
+ - id: '8007'
+ test_type: Timeframe_Combo_Gain
+ sql_flavor: oracle
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS VARCHAR2(20)) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM (
+ SELECT {COLUMN_NAME_NO_QUOTES}
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}") - 2 * {WINDOW_DAYS}
+ AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}") - {WINDOW_DAYS}
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
+ MINUS
+ SELECT {COLUMN_NAME_NO_QUOTES}
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}") - {WINDOW_DAYS}
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
+ ) test
+ - id: '8007'
+ test_type: Timeframe_Combo_Gain
+ sql_flavor: sap_hana
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS VARCHAR(20)) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM (
+ SELECT {COLUMN_NAME_NO_QUOTES}
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}"), -2 * {WINDOW_DAYS})
+ AND {WINDOW_DATE_COLUMN} < ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}"), -{WINDOW_DAYS})
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
+ EXCEPT
+ SELECT {COLUMN_NAME_NO_QUOTES}
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}"), -{WINDOW_DAYS})
+ GROUP BY {COLUMN_NAME_NO_QUOTES}
+ ) test
diff --git a/testgen/template/dbsetup_test_types/test_types_Timeframe_Combo_Match.yaml b/testgen/template/dbsetup_test_types/test_types_Timeframe_Combo_Match.yaml
index 24b17cc4..1c9851dc 100644
--- a/testgen/template/dbsetup_test_types/test_types_Timeframe_Combo_Match.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Timeframe_Combo_Match.yaml
@@ -273,6 +273,72 @@ test_types:
LIMIT {LIMIT_2}
)
error_type: Test Results
+ - id: '8509'
+ test_id: '1509'
+ test_type: Timeframe_Combo_Match
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT * FROM (
+ SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") - {WINDOW_DAYS}
+ MINUS
+ SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") - 2 * {WINDOW_DAYS}
+ AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") - {WINDOW_DAYS}
+ ) WHERE ROWNUM <= {LIMIT_2}
+ UNION ALL
+ SELECT * FROM (
+ SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") - 2 * {WINDOW_DAYS}
+ AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") - {WINDOW_DAYS}
+ MINUS
+ SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") - {WINDOW_DAYS}
+ ) WHERE ROWNUM <= {LIMIT_2}
+ error_type: Test Results
+ - id: '8509'
+ test_id: '1509'
+ test_type: Timeframe_Combo_Match
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ (
+ SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), -{WINDOW_DAYS})
+ EXCEPT
+ SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), -2 * {WINDOW_DAYS})
+ AND {WINDOW_DATE_COLUMN} < ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), -{WINDOW_DAYS})
+ LIMIT {LIMIT_2}
+ )
+ UNION ALL
+ (
+ SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), -2 * {WINDOW_DAYS})
+ AND {WINDOW_DATE_COLUMN} < ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), -{WINDOW_DAYS})
+ EXCEPT
+ SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), -{WINDOW_DAYS})
+ LIMIT {LIMIT_2}
+ )
+ error_type: Test Results
test_templates:
- id: '2508'
test_type: Timeframe_Combo_Match
@@ -706,3 +772,111 @@ test_types:
AND {WINDOW_DATE_COLUMN} >= DATEADD("day", - {WINDOW_DAYS}, (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {QUOTE}{SCHEMA_NAME}{QUOTE}.{QUOTE}{TABLE_NAME}{QUOTE}))
)
) test;
+ - id: '8008'
+ test_type: Timeframe_Combo_Match
+ sql_flavor: oracle
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS VARCHAR2(20)) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM (
+ (
+ SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}") - {WINDOW_DAYS}
+ MINUS
+ SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}") - 2 * {WINDOW_DAYS}
+ AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}") - {WINDOW_DAYS}
+ )
+ UNION ALL
+ (
+ SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}") - 2 * {WINDOW_DAYS}
+ AND {WINDOW_DATE_COLUMN} < (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}") - {WINDOW_DAYS}
+ MINUS
+ SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}") - {WINDOW_DAYS}
+ )
+ ) test
+ - id: '8008'
+ test_type: Timeframe_Combo_Match
+ sql_flavor: sap_hana
+ template: |-
+ SELECT '{TEST_TYPE}' as test_type,
+ '{TEST_DEFINITION_ID}' as test_definition_id,
+ '{TEST_SUITE_ID}' as test_suite_id,
+ '{TEST_RUN_ID}' as test_run_id,
+ '{RUN_DATE}' as test_time,
+ '{SCHEMA_NAME}' as schema_name,
+ '{TABLE_NAME}' as table_name,
+ '{COLUMN_NAME_NO_QUOTES}' as column_names,
+ '{SKIP_ERRORS}' as threshold_value,
+ {SKIP_ERRORS} as skip_errors,
+ '{INPUT_PARAMETERS}' as input_parameters,
+ NULL as result_signal,
+ CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+ CASE
+ WHEN COUNT(*) > 0 THEN
+ CAST(COUNT(*) AS VARCHAR(20)) || ' error(s) identified, ' ||
+ CASE
+ WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+ ELSE 'within limit of '
+ END || '{SKIP_ERRORS}.'
+ ELSE 'No errors found.'
+ END AS result_message,
+ COUNT(*) as result_measure
+ FROM (
+ (
+ SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}"), -{WINDOW_DAYS})
+ EXCEPT
+ SELECT 'Prior Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}"), -2 * {WINDOW_DAYS})
+ AND {WINDOW_DATE_COLUMN} < ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}"), -{WINDOW_DAYS})
+ )
+ UNION ALL
+ (
+ SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}"), -2 * {WINDOW_DAYS})
+ AND {WINDOW_DATE_COLUMN} < ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}"), -{WINDOW_DAYS})
+ EXCEPT
+ SELECT 'Latest Timeframe' as missing_from, {COLUMN_NAME_NO_QUOTES}
+ FROM "{SCHEMA_NAME}"."{TABLE_NAME}"
+ WHERE {SUBSET_CONDITION}
+ AND {WINDOW_DATE_COLUMN} >= ADD_DAYS((SELECT MAX({WINDOW_DATE_COLUMN}) FROM "{SCHEMA_NAME}"."{TABLE_NAME}"), -{WINDOW_DAYS})
+ )
+ ) test
diff --git a/testgen/template/dbsetup_test_types/test_types_US_State.yaml b/testgen/template/dbsetup_test_types/test_types_US_State.yaml
index d663db1f..21acdc38 100644
--- a/testgen/template/dbsetup_test_types/test_types_US_State.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_US_State.yaml
@@ -101,6 +101,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8027'
+ test_type: US_State
+ sql_flavor: oracle
+ measure: |-
+ SUM(CASE WHEN NULLIF({COLUMN_NAME}, '') NOT IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA') THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8027'
+ test_type: US_State
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(CASE WHEN NULLIF({COLUMN_NAME}, '') NOT IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA') THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1390'
test_id: '1036'
@@ -162,4 +178,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE NULLIF("{COLUMN_NAME}", '') NOT IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA') GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT};
error_type: Test Results
+ - id: '8036'
+ test_id: '1036'
+ test_type: US_State
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NOT NULL AND "{COLUMN_NAME}" NOT IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA') GROUP BY "{COLUMN_NAME}" FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8036'
+ test_id: '1036'
+ test_type: US_State
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE "{COLUMN_NAME}" IS NOT NULL AND "{COLUMN_NAME}" NOT IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA') GROUP BY "{COLUMN_NAME}" LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Unique.yaml b/testgen/template/dbsetup_test_types/test_types_Unique.yaml
index a084f307..d02a9e38 100644
--- a/testgen/template/dbsetup_test_types/test_types_Unique.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Unique.yaml
@@ -101,6 +101,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8028'
+ test_type: Unique
+ sql_flavor: oracle
+ measure: |-
+ COUNT(*) - COUNT(DISTINCT {COLUMN_NAME})
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8028'
+ test_type: Unique
+ sql_flavor: sap_hana
+ measure: |-
+ COUNT(*) - COUNT(DISTINCT {COLUMN_NAME})
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1391'
test_id: '1034'
@@ -163,4 +179,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" HAVING COUNT(*) > 1 ORDER BY COUNT(*) DESC LIMIT {LIMIT};
error_type: Test Results
+ - id: '8028'
+ test_id: '1034'
+ test_type: Unique
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" HAVING COUNT(*) > 1 ORDER BY COUNT(*) DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8028'
+ test_id: '1034'
+ test_type: Unique
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" HAVING COUNT(*) > 1 ORDER BY COUNT(*) DESC LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Unique_Pct.yaml b/testgen/template/dbsetup_test_types/test_types_Unique_Pct.yaml
index 4f79e0dd..77f8aae5 100644
--- a/testgen/template/dbsetup_test_types/test_types_Unique_Pct.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Unique_Pct.yaml
@@ -101,6 +101,22 @@ test_types:
test_operator: '>='
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8029'
+ test_type: Unique_Pct
+ sql_flavor: oracle
+ measure: |-
+ ABS(2.0 * ASIN(SQRT(CAST({BASELINE_UNIQUE_CT} AS NUMBER) / CAST({BASELINE_VALUE_CT} AS NUMBER))) - 2 * ASIN(SQRT(CAST(COUNT(DISTINCT {COLUMN_NAME}) AS NUMBER) / CAST(NULLIF(COUNT({COLUMN_NAME}), 0) AS NUMBER))))
+ test_operator: '>='
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8029'
+ test_type: Unique_Pct
+ sql_flavor: sap_hana
+ measure: |-
+ ABS(2.0 * ASIN(SQRT(CAST({BASELINE_UNIQUE_CT} AS DECIMAL) / CAST({BASELINE_VALUE_CT} AS DECIMAL))) - 2 * ASIN(SQRT(CAST(COUNT(DISTINCT {COLUMN_NAME}) AS DECIMAL) / CAST(NULLIF(COUNT({COLUMN_NAME}), 0) AS DECIMAL))))
+ test_operator: '>='
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1392'
test_id: '1035'
@@ -162,4 +178,20 @@ test_types:
lookup_query: |-
SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT {LIMIT};
error_type: Test Results
+ - id: '8029'
+ test_id: '1035'
+ test_type: Unique_Pct
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8029'
+ test_id: '1035'
+ test_type: Unique_Pct
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Valid_Characters.yaml b/testgen/template/dbsetup_test_types/test_types_Valid_Characters.yaml
index e2e2f9ce..09d90d0a 100644
--- a/testgen/template/dbsetup_test_types/test_types_Valid_Characters.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Valid_Characters.yaml
@@ -101,6 +101,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8036'
+ test_type: Valid_Characters
+ sql_flavor: oracle
+ measure: |-
+ SUM(CASE WHEN TRANSLATE({COLUMN_NAME}, 'X' || UNISTR('\00A0') || UNISTR('\200B') || UNISTR('\FEFF') || UNISTR('\202F') || UNISTR('\2009') || UNISTR('\3000') || UNISTR('\200C'), 'XXXXXXXX') <> {COLUMN_NAME} OR {COLUMN_NAME} LIKE ' %' OR {COLUMN_NAME} LIKE '''%''' OR {COLUMN_NAME} LIKE '"%"' THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8036'
+ test_type: Valid_Characters
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(CASE WHEN REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE({COLUMN_NAME}, NCHAR(160), ''), NCHAR(8203), ''), NCHAR(65279), ''), NCHAR(8239), ''), NCHAR(8201), ''), NCHAR(12288), ''), NCHAR(8204), '') <> {COLUMN_NAME} OR {COLUMN_NAME} LIKE ' %' OR {COLUMN_NAME} LIKE '''%''' OR {COLUMN_NAME} LIKE '"%"' THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1397'
test_id: '1043'
@@ -166,4 +182,20 @@ test_types:
lookup_query: |-
SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE TRANSLATE("{COLUMN_NAME}", CHAR(160) || CHAR(8203) || CHAR(65279) || CHAR(8239) || CHAR(8201) || CHAR(12288) || CHAR(8204), 'XXXXXXX') <> "{COLUMN_NAME}" OR "{COLUMN_NAME}" LIKE ' %' OR "{COLUMN_NAME}" LIKE '''%''' OR "{COLUMN_NAME}" LIKE '"%"' ORDER BY record_ct DESC LIMIT {LIMIT};
error_type: Test Results
+ - id: '8043'
+ test_id: '1043'
+ test_type: Valid_Characters
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE TRANSLATE("{COLUMN_NAME}", 'X' || UNISTR('\00A0') || UNISTR('\200B') || UNISTR('\FEFF') || UNISTR('\202F') || UNISTR('\2009') || UNISTR('\3000') || UNISTR('\200C'), 'XXXXXXXX') <> "{COLUMN_NAME}" OR "{COLUMN_NAME}" LIKE ' %' OR "{COLUMN_NAME}" LIKE '''%''' OR "{COLUMN_NAME}" LIKE '"%"' GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8043'
+ test_id: '1043'
+ test_type: Valid_Characters
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", NCHAR(160), ''), NCHAR(8203), ''), NCHAR(65279), ''), NCHAR(8239), ''), NCHAR(8201), ''), NCHAR(12288), ''), NCHAR(8204), '') <> "{COLUMN_NAME}" OR "{COLUMN_NAME}" LIKE ' %' OR "{COLUMN_NAME}" LIKE '''%''' OR "{COLUMN_NAME}" LIKE '"%"' GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Valid_Month.yaml b/testgen/template/dbsetup_test_types/test_types_Valid_Month.yaml
index 07dd037f..343587b7 100644
--- a/testgen/template/dbsetup_test_types/test_types_Valid_Month.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Valid_Month.yaml
@@ -100,5 +100,21 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8033'
+ test_type: Valid_Month
+ sql_flavor: oracle
+ measure: |-
+ SUM(CASE WHEN {COLUMN_NAME} IS NOT NULL AND {COLUMN_NAME} NOT IN ({BASELINE_VALUE}) THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8033'
+ test_type: Valid_Month
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(CASE WHEN {COLUMN_NAME} IS NOT NULL AND {COLUMN_NAME} NOT IN ({BASELINE_VALUE}) THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups: []
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Valid_US_Zip.yaml b/testgen/template/dbsetup_test_types/test_types_Valid_US_Zip.yaml
index 29e12359..a42d0aa2 100644
--- a/testgen/template/dbsetup_test_types/test_types_Valid_US_Zip.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Valid_US_Zip.yaml
@@ -99,6 +99,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8034'
+ test_type: Valid_US_Zip
+ sql_flavor: oracle
+ measure: |-
+ SUM(CASE WHEN TRANSLATE({COLUMN_NAME},'012345678','999999999') NOT IN ('99999', '999999999', '99999-9999') THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8034'
+ test_type: Valid_US_Zip
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(CASE WHEN REPLACE_REGEXPR('[0-9]' IN {COLUMN_NAME} WITH '9') NOT IN ('99999', '999999999', '99999-9999') THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1398'
test_id: '1044'
@@ -161,4 +177,20 @@ test_types:
lookup_query: |-
SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE TRANSLATE("{COLUMN_NAME}",'012345678','999999999') NOT IN ('99999', '999999999', '99999-9999') GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC LIMIT {LIMIT};
error_type: Test Results
+ - id: '8044'
+ test_id: '1044'
+ test_type: Valid_US_Zip
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE TRANSLATE("{COLUMN_NAME}",'012345678','999999999') NOT IN ('99999', '999999999', '99999-9999') GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8044'
+ test_id: '1044'
+ test_type: Valid_US_Zip
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE REPLACE_REGEXPR('[0-9]' IN "{COLUMN_NAME}" WITH '9') NOT IN ('99999', '999999999', '99999-9999') GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Valid_US_Zip3.yaml b/testgen/template/dbsetup_test_types/test_types_Valid_US_Zip3.yaml
index f2611807..31a6d9ab 100644
--- a/testgen/template/dbsetup_test_types/test_types_Valid_US_Zip3.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Valid_US_Zip3.yaml
@@ -100,6 +100,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8035'
+ test_type: Valid_US_Zip3
+ sql_flavor: oracle
+ measure: |-
+ SUM(CASE WHEN TRANSLATE({COLUMN_NAME},'012345678','999999999') <> '999' THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8035'
+ test_type: Valid_US_Zip3
+ sql_flavor: sap_hana
+ measure: |-
+ SUM(CASE WHEN REPLACE_REGEXPR('[0-9]' IN {COLUMN_NAME} WITH '9') <> '999' THEN 1 ELSE 0 END)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1399'
test_id: '1045'
@@ -162,4 +178,20 @@ test_types:
lookup_query: |-
SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE TRANSLATE("{COLUMN_NAME}",'012345678','999999999') <> '999' GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC LIMIT {LIMIT};
error_type: Test Results
+ - id: '8045'
+ test_id: '1045'
+ test_type: Valid_US_Zip3
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE TRANSLATE("{COLUMN_NAME}",'012345678','999999999') <> '999' GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8045'
+ test_id: '1045'
+ test_type: Valid_US_Zip3
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT "{COLUMN_NAME}", COUNT(*) AS record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" WHERE REPLACE_REGEXPR('[0-9]' IN "{COLUMN_NAME}" WITH '9') <> '999' GROUP BY "{COLUMN_NAME}" ORDER BY record_ct DESC LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Variability_Decrease.yaml b/testgen/template/dbsetup_test_types/test_types_Variability_Decrease.yaml
index 6cab00de..74b91f96 100644
--- a/testgen/template/dbsetup_test_types/test_types_Variability_Decrease.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Variability_Decrease.yaml
@@ -105,6 +105,22 @@ test_types:
test_operator: <
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8032'
+ test_type: Variability_Decrease
+ sql_flavor: oracle
+ measure: |-
+ 100.0*STDDEV(CAST({COLUMN_NAME} AS NUMBER))/CAST({BASELINE_SD} AS NUMBER)
+ test_operator: <
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8032'
+ test_type: Variability_Decrease
+ sql_flavor: sap_hana
+ measure: |-
+ 100.0*STDDEV(CAST({COLUMN_NAME} AS DECIMAL))/CAST({BASELINE_SD} AS DECIMAL)
+ test_operator: <
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1395'
test_id: '1041'
@@ -163,4 +179,20 @@ test_types:
lookup_query: |-
SELECT STDDEV(CAST("{COLUMN_NAME}" AS FLOAT)) as current_standard_deviation FROM "{TARGET_SCHEMA}"."{TABLE_NAME}";
error_type: Test Results
+ - id: '8032'
+ test_id: '1041'
+ test_type: Variability_Decrease
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT STDDEV(CAST("{COLUMN_NAME}" AS NUMBER)) as current_standard_deviation FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ error_type: Test Results
+ - id: '8032'
+ test_id: '1041'
+ test_type: Variability_Decrease
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT STDDEV(CAST("{COLUMN_NAME}" AS DECIMAL)) as current_standard_deviation FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Variability_Increase.yaml b/testgen/template/dbsetup_test_types/test_types_Variability_Increase.yaml
index e05a1234..1992ec41 100644
--- a/testgen/template/dbsetup_test_types/test_types_Variability_Increase.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Variability_Increase.yaml
@@ -109,6 +109,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8031'
+ test_type: Variability_Increase
+ sql_flavor: oracle
+ measure: |-
+ 100.0*STDDEV(CAST({COLUMN_NAME} AS NUMBER))/CAST({BASELINE_SD} AS NUMBER)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8031'
+ test_type: Variability_Increase
+ sql_flavor: sap_hana
+ measure: |-
+ 100.0*STDDEV(CAST({COLUMN_NAME} AS DECIMAL))/CAST({BASELINE_SD} AS DECIMAL)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1394'
test_id: '1040'
@@ -167,4 +183,20 @@ test_types:
lookup_query: |-
SELECT STDDEV(CAST("{COLUMN_NAME}" AS FLOAT)) as current_standard_deviation FROM "{TARGET_SCHEMA}"."{TABLE_NAME}";
error_type: Test Results
+ - id: '8031'
+ test_id: '1040'
+ test_type: Variability_Increase
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT STDDEV(CAST("{COLUMN_NAME}" AS NUMBER)) as current_standard_deviation FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ error_type: Test Results
+ - id: '8031'
+ test_id: '1040'
+ test_type: Variability_Increase
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT STDDEV(CAST("{COLUMN_NAME}" AS DECIMAL)) as current_standard_deviation FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Volume_Trend.yaml b/testgen/template/dbsetup_test_types/test_types_Volume_Trend.yaml
index 3bc15367..e748f130 100644
--- a/testgen/template/dbsetup_test_types/test_types_Volume_Trend.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Volume_Trend.yaml
@@ -20,6 +20,7 @@ test_types:
default_parm_values: null
default_parm_prompts: Record Subset Condition,Lower Bound,Upper Bound,History Lookback
default_parm_help: Condition defining a subset of records in main table
+ default_parm_required: N,N,N,N
default_severity: Fail
run_type: CAT
test_scope: table
@@ -89,6 +90,22 @@ test_types:
test_operator: NOT BETWEEN
test_condition: |-
{LOWER_TOLERANCE} AND {UPPER_TOLERANCE}
+ - id: '2815'
+ test_type: Volume_Trend
+ sql_flavor: oracle
+ measure: |-
+ {CUSTOM_QUERY}
+ test_operator: NOT BETWEEN
+ test_condition: |-
+ {LOWER_TOLERANCE} AND {UPPER_TOLERANCE}
+ - id: '2815'
+ test_type: Volume_Trend
+ sql_flavor: sap_hana
+ measure: |-
+ {CUSTOM_QUERY}
+ test_operator: NOT BETWEEN
+ test_condition: |-
+ {LOWER_TOLERANCE} AND {UPPER_TOLERANCE}
target_data_lookups:
- id: '1477'
test_id: '1513'
@@ -167,4 +184,26 @@ test_types:
{UPPER_TOLERANCE} AS upper_bound
FROM "{TARGET_SCHEMA}"."{TABLE_NAME}";
error_type: Test Results
+ - id: '8015'
+ test_id: '1513'
+ test_type: Volume_Trend
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ SELECT {CUSTOM_QUERY} AS current_count,
+ {LOWER_TOLERANCE} AS lower_bound,
+ {UPPER_TOLERANCE} AS upper_bound
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ error_type: Test Results
+ - id: '8015'
+ test_id: '1513'
+ test_type: Volume_Trend
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ SELECT {CUSTOM_QUERY} AS current_count,
+ {LOWER_TOLERANCE} AS lower_bound,
+ {UPPER_TOLERANCE} AS upper_bound
+ FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbsetup_test_types/test_types_Weekly_Rec_Ct.yaml b/testgen/template/dbsetup_test_types/test_types_Weekly_Rec_Ct.yaml
index 1aff7bb4..3c288eaf 100644
--- a/testgen/template/dbsetup_test_types/test_types_Weekly_Rec_Ct.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Weekly_Rec_Ct.yaml
@@ -41,7 +41,7 @@ test_types:
test_type: Weekly_Rec_Ct
sql_flavor: bigquery
measure: |-
- DATETIME_DIFF(DATETIME_TRUNC(SAFE_CAST(MAX({COLUMN_NAME}) AS DATE), WEEK), DATETIME_TRUNC(SAFE_CAST(MIN({COLUMN_NAME}) AS DATE), WEEK), WEEK) + 1 - COUNT(DISTINCT DATETIME_TRUNC({COLUMN_NAME}, WEEK))
+ DATETIME_DIFF(DATETIME_TRUNC(SAFE_CAST(MAX({COLUMN_NAME}) AS DATE), ISOWEEK), DATETIME_TRUNC(SAFE_CAST(MIN({COLUMN_NAME}) AS DATE), ISOWEEK), ISOWEEK) + 1 - COUNT(DISTINCT DATETIME_TRUNC({COLUMN_NAME}, ISOWEEK))
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -49,7 +49,7 @@ test_types:
test_type: Weekly_Rec_Ct
sql_flavor: databricks
measure: |-
- CAST(<%DATEDIFF_WEEK;MIN({COLUMN_NAME});MAX({COLUMN_NAME})%> + 1 - COUNT(DISTINCT DATE_TRUNC('week', {COLUMN_NAME})) AS INT)
+ MAX(<%DATEDIFF_WEEK;'1800-01-01'::DATE;{COLUMN_NAME}%>) - MIN(<%DATEDIFF_WEEK;'1800-01-01'::DATE;{COLUMN_NAME}%>)+1 - COUNT(DISTINCT <%DATEDIFF_WEEK;'1800-01-01'::DATE;{COLUMN_NAME}%>)
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -57,7 +57,7 @@ test_types:
test_type: Weekly_Rec_Ct
sql_flavor: mssql
measure: |-
- MAX(DATEDIFF(week, CAST('1800-01-01' AS DATE), {COLUMN_NAME})) - MIN(DATEDIFF(week, CAST('1800-01-01' AS DATE), {COLUMN_NAME}))+1 - COUNT(DISTINCT DATEDIFF(week, CAST('1800-01-01' AS DATE), {COLUMN_NAME}))
+ MAX(DATEDIFF(week, CAST('1800-01-01' AS DATE), DATEADD(day, -1, {COLUMN_NAME}))) - MIN(DATEDIFF(week, CAST('1800-01-01' AS DATE), DATEADD(day, -1, {COLUMN_NAME})))+1 - COUNT(DISTINCT DATEDIFF(week, CAST('1800-01-01' AS DATE), DATEADD(day, -1, {COLUMN_NAME})))
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -73,7 +73,7 @@ test_types:
test_type: Weekly_Rec_Ct
sql_flavor: redshift
measure: |-
- MAX(DATEDIFF(week, '1800-01-01'::DATE, {COLUMN_NAME})) - MIN(DATEDIFF(week, '1800-01-01'::DATE, {COLUMN_NAME}))+1 - COUNT(DISTINCT DATEDIFF(week, '1800-01-01'::DATE, {COLUMN_NAME}))
+ MAX(<%DATEDIFF_WEEK;'1800-01-01'::DATE;{COLUMN_NAME}%>) - MIN(<%DATEDIFF_WEEK;'1800-01-01'::DATE;{COLUMN_NAME}%>)+1 - COUNT(DISTINCT <%DATEDIFF_WEEK;'1800-01-01'::DATE;{COLUMN_NAME}%>)
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -81,7 +81,7 @@ test_types:
test_type: Weekly_Rec_Ct
sql_flavor: redshift_spectrum
measure: |-
- MAX(DATEDIFF(week, '1800-01-01'::DATE, {COLUMN_NAME})) - MIN(DATEDIFF(week, '1800-01-01'::DATE, {COLUMN_NAME}))+1 - COUNT(DISTINCT DATEDIFF(week, '1800-01-01'::DATE, {COLUMN_NAME}))
+ MAX(<%DATEDIFF_WEEK;'1800-01-01'::DATE;{COLUMN_NAME}%>) - MIN(<%DATEDIFF_WEEK;'1800-01-01'::DATE;{COLUMN_NAME}%>)+1 - COUNT(DISTINCT <%DATEDIFF_WEEK;'1800-01-01'::DATE;{COLUMN_NAME}%>)
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -89,7 +89,7 @@ test_types:
test_type: Weekly_Rec_Ct
sql_flavor: snowflake
measure: |-
- MAX(DATEDIFF(week, '1800-01-01'::DATE, {COLUMN_NAME})) - MIN(DATEDIFF(week, '1800-01-01'::DATE, {COLUMN_NAME}))+1 - COUNT(DISTINCT DATEDIFF(week, '1800-01-01'::DATE, {COLUMN_NAME}))
+ MAX(<%DATEDIFF_WEEK;'1800-01-01'::DATE;{COLUMN_NAME}%>) - MIN(<%DATEDIFF_WEEK;'1800-01-01'::DATE;{COLUMN_NAME}%>)+1 - COUNT(DISTINCT <%DATEDIFF_WEEK;'1800-01-01'::DATE;{COLUMN_NAME}%>)
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
@@ -101,6 +101,22 @@ test_types:
test_operator: '>'
test_condition: |-
{THRESHOLD_VALUE}
+ - id: '8030'
+ test_type: Weekly_Rec_Ct
+ sql_flavor: oracle
+ measure: |-
+ MAX(<%DATEDIFF_WEEK;DATE '1800-01-01';{COLUMN_NAME}%>) - MIN(<%DATEDIFF_WEEK;DATE '1800-01-01';{COLUMN_NAME}%>)+1 - COUNT(DISTINCT <%DATEDIFF_WEEK;DATE '1800-01-01';{COLUMN_NAME}%>)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
+ - id: '8030'
+ test_type: Weekly_Rec_Ct
+ sql_flavor: sap_hana
+ measure: |-
+ MAX(<%DATEDIFF_WEEK;DATE '1800-01-01';{COLUMN_NAME}%>) - MIN(<%DATEDIFF_WEEK;DATE '1800-01-01';{COLUMN_NAME}%>)+1 - COUNT(DISTINCT <%DATEDIFF_WEEK;DATE '1800-01-01';{COLUMN_NAME}%>)
+ test_operator: '>'
+ test_condition: |-
+ {THRESHOLD_VALUE}
target_data_lookups:
- id: '1393'
test_id: '1037'
@@ -112,14 +128,14 @@ test_types:
SELECT week_start AS all_dates
FROM UNNEST(
GENERATE_DATE_ARRAY(
- DATE_TRUNC((SELECT MIN(CAST(`{COLUMN_NAME}` AS DATE)) FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`), WEEK),
- DATE_TRUNC((SELECT MAX(CAST(`{COLUMN_NAME}` AS DATE)) FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`), WEEK),
+ DATE_TRUNC((SELECT MIN(CAST(`{COLUMN_NAME}` AS DATE)) FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`), ISOWEEK),
+ DATE_TRUNC((SELECT MAX(CAST(`{COLUMN_NAME}` AS DATE)) FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`), ISOWEEK),
INTERVAL 7 DAY
)
) AS week_start
),
existing_periods AS (
- SELECT DISTINCT DATE_TRUNC(CAST(`{COLUMN_NAME}` AS DATE), WEEK) AS period, COUNT(1) AS period_count
+ SELECT DISTINCT DATE_TRUNC(CAST(`{COLUMN_NAME}` AS DATE), ISOWEEK) AS period, COUNT(1) AS period_count
FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`
GROUP BY period
),
@@ -163,20 +179,20 @@ test_types:
All_Nums as (select row_number() over(order by C) as Number from Pass4),
tally as (SELECT Number FROM All_Nums WHERE Number <= 45000),
- date_range as (SELECT CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, MIN("{COLUMN_NAME}")), 0) AS DATE) AS min_period,
- CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, MAX("{COLUMN_NAME}")), 0) AS DATE) AS max_period,
+ date_range as (SELECT CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, DATEADD(day, -1, MIN("{COLUMN_NAME}"))), 0) AS DATE) AS min_period,
+ CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, DATEADD(day, -1, MAX("{COLUMN_NAME}"))), 0) AS DATE) AS max_period,
DATEDIFF(WEEK,
- CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, MIN("{COLUMN_NAME}")), 0) AS DATE),
- CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, MAX("{COLUMN_NAME}")), 0) AS DATE) ) + 1 as period_ct
+ CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, DATEADD(day, -1, MIN("{COLUMN_NAME}"))), 0) AS DATE),
+ CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, DATEADD(day, -1, MAX("{COLUMN_NAME}"))), 0) AS DATE) ) + 1 as period_ct
FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" ),
check_periods as ( SELECT d.min_period, d.max_period, t.number,
DATEADD(WEEK, -(t.number - 1), d.max_period) AS check_period
FROM date_range d
INNER JOIN tally t
ON (d.period_ct >= t.number) ),
- data_by_period as (SELECT CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, "{COLUMN_NAME}"), 0) AS DATE) as data_period, COUNT(*) as record_ct
+ data_by_period as (SELECT CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, DATEADD(day, -1, "{COLUMN_NAME}")), 0) AS DATE) as data_period, COUNT(*) as record_ct
FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
- GROUP BY CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, "{COLUMN_NAME}"), 0) AS DATE) ),
+ GROUP BY CAST(DATEADD(WEEK, DATEDIFF(WEEK, 0, DATEADD(day, -1, "{COLUMN_NAME}")), 0) AS DATE) ),
data_by_prd_with_prior_next as (SELECT check_period,
RANK() OVER (ORDER BY check_period DESC) as ranked,
ISNULL(d.record_ct, 0) as record_ct,
@@ -185,15 +201,13 @@ test_types:
FROM check_periods c
LEFT JOIN data_by_period d
ON (c.check_period = d.data_period) )
- SELECT TOP {LIMIT} check_period, record_ct,
+ SELECT TOP {LIMIT} check_period AS missing_period, record_ct,
CASE
WHEN record_ct = 0 THEN 'MISSING'
ELSE 'Present'
END as status
FROM data_by_prd_with_prior_next
WHERE record_ct = 0
- OR last_record_ct = 0
- OR next_record_ct = 0
ORDER BY check_period DESC;
error_type: Test Results
- id: '1112'
@@ -228,4 +242,20 @@ test_types:
lookup_query: |-
WITH RECURSIVE daterange(all_dates) AS (SELECT DATE_TRUNC('week',MIN("{COLUMN_NAME}")) :: DATE AS all_dates FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" UNION ALL SELECT (d.all_dates + INTERVAL '1 week' ) :: DATE AS all_dates FROM daterange d WHERE d.all_dates < (SELECT DATE_TRUNC('week', MAX("{COLUMN_NAME}")) :: DATE FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") ), existing_periods AS ( SELECT DISTINCT DATE_TRUNC('week',"{COLUMN_NAME}") :: DATE AS period, COUNT(1) as period_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY DATE_TRUNC('week',"{COLUMN_NAME}") :: DATE ) SELECT p.missing_period, p.prior_available_week, e.period_count as prior_available_week_count, p.next_available_week, f.period_count as next_available_week_count FROM( SELECT d.all_dates as missing_period, MAX(b.period) AS prior_available_week, MIN(c.period) AS next_available_week FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates ) p LEFT JOIN existing_periods e ON (p.prior_available_week = e.period) LEFT JOIN existing_periods f ON (p.next_available_week = f.period) ORDER BY p.missing_period LIMIT {LIMIT};
error_type: Test Results
+ - id: '8037'
+ test_id: '1037'
+ test_type: Weekly_Rec_Ct
+ sql_flavor: oracle
+ lookup_type: null
+ lookup_query: |-
+ WITH daterange AS (SELECT (SELECT TRUNC(MIN("{COLUMN_NAME}"), 'IW') FROM "{TARGET_SCHEMA}"."{TABLE_NAME}") + (LEVEL - 1) * 7 AS all_dates FROM DUAL CONNECT BY LEVEL <= CEIL((TRUNC((SELECT MAX("{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}")) - TRUNC((SELECT MIN("{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"))) / 7) + 1), existing_periods AS (SELECT DISTINCT TRUNC("{COLUMN_NAME}", 'IW') AS period, COUNT(1) AS period_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY TRUNC("{COLUMN_NAME}", 'IW')) SELECT p.missing_period, p.prior_available_week, e.period_count AS prior_available_week_count, p.next_available_week, f.period_count AS next_available_week_count FROM (SELECT d.all_dates AS missing_period, MAX(b.period) AS prior_available_week, MIN(c.period) AS next_available_week FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates) p LEFT JOIN existing_periods e ON (p.prior_available_week = e.period) LEFT JOIN existing_periods f ON (p.next_available_week = f.period) ORDER BY p.missing_period FETCH FIRST {LIMIT} ROWS ONLY
+ error_type: Test Results
+ - id: '8037'
+ test_id: '1037'
+ test_type: Weekly_Rec_Ct
+ sql_flavor: sap_hana
+ lookup_type: null
+ lookup_query: |-
+ WITH Pass0 AS (SELECT 1 C FROM DUMMY UNION ALL SELECT 1 FROM DUMMY), Pass1 AS (SELECT 1 C FROM Pass0 A, Pass0 B), Pass2 AS (SELECT 1 C FROM Pass1 A, Pass1 B), Pass3 AS (SELECT 1 C FROM Pass2 A, Pass2 B), nums AS (SELECT ROW_NUMBER() OVER (ORDER BY C) - 1 AS rn FROM Pass3), bounds AS (SELECT ADD_DAYS(CAST(MIN("{COLUMN_NAME}") AS DATE), -WEEKDAY(CAST(MIN("{COLUMN_NAME}") AS DATE))) AS min_week, ADD_DAYS(CAST(MAX("{COLUMN_NAME}") AS DATE), -WEEKDAY(CAST(MAX("{COLUMN_NAME}") AS DATE))) AS max_week FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"), daterange AS (SELECT ADD_DAYS(b.min_week, n.rn * 7) AS all_dates FROM bounds b, nums n WHERE ADD_DAYS(b.min_week, n.rn * 7) <= b.max_week), existing_periods AS (SELECT DISTINCT ADD_DAYS(CAST("{COLUMN_NAME}" AS DATE), -WEEKDAY(CAST("{COLUMN_NAME}" AS DATE))) AS period, COUNT(1) AS period_count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY ADD_DAYS(CAST("{COLUMN_NAME}" AS DATE), -WEEKDAY(CAST("{COLUMN_NAME}" AS DATE)))) SELECT p.missing_period, p.prior_available_week, e.period_count AS prior_available_week_count, p.next_available_week, f.period_count AS next_available_week_count FROM (SELECT d.all_dates AS missing_period, MAX(b.period) AS prior_available_week, MIN(c.period) AS next_available_week FROM daterange d LEFT JOIN existing_periods a ON d.all_dates = a.period LEFT JOIN existing_periods b ON b.period < d.all_dates LEFT JOIN existing_periods c ON c.period > d.all_dates WHERE a.period IS NULL AND d.all_dates BETWEEN b.period AND c.period GROUP BY d.all_dates) p LEFT JOIN existing_periods e ON (p.prior_available_week = e.period) LEFT JOIN existing_periods f ON (p.next_available_week = f.period) ORDER BY p.missing_period LIMIT {LIMIT}
+ error_type: Test Results
test_templates: []
diff --git a/testgen/template/dbupgrade/0174_incremental_upgrade.sql b/testgen/template/dbupgrade/0174_incremental_upgrade.sql
new file mode 100644
index 00000000..954c7015
--- /dev/null
+++ b/testgen/template/dbupgrade/0174_incremental_upgrade.sql
@@ -0,0 +1,60 @@
+SET SEARCH_PATH TO {SCHEMA_NAME};
+
+-- =============================================================================
+-- Create project_memberships table
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS project_memberships (
+ id UUID DEFAULT gen_random_uuid()
+ CONSTRAINT pk_project_memberships_id
+ PRIMARY KEY,
+ user_id UUID NOT NULL
+ CONSTRAINT fk_project_memberships_auth_users
+ REFERENCES auth_users(id)
+ ON DELETE CASCADE,
+ project_code VARCHAR(30) NOT NULL
+ CONSTRAINT fk_project_memberships_projects
+ REFERENCES projects(project_code)
+ ON DELETE CASCADE,
+ role VARCHAR(20) NOT NULL,
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+
+ CONSTRAINT uq_project_memberships_user_project
+ UNIQUE (user_id, project_code)
+);
+
+CREATE INDEX IF NOT EXISTS ix_pm_user_id ON project_memberships(user_id);
+CREATE INDEX IF NOT EXISTS ix_pm_project_code ON project_memberships(project_code);
+CREATE INDEX IF NOT EXISTS ix_pm_role ON project_memberships(role);
+
+-- =============================================================================
+-- Add is_global_admin column to auth_users
+-- =============================================================================
+
+ALTER TABLE auth_users ADD COLUMN IF NOT EXISTS is_global_admin BOOLEAN NOT NULL DEFAULT FALSE;
+
+-- =============================================================================
+-- Set is_global_admin = TRUE for users with role = 'admin'
+-- =============================================================================
+
+UPDATE auth_users SET is_global_admin = TRUE WHERE role = 'admin';
+
+-- =============================================================================
+-- Migrate ALL users to project_memberships
+-- Each user gets their current role in every existing project
+-- =============================================================================
+
+INSERT INTO project_memberships (user_id, project_code, role)
+SELECT
+ u.id AS user_id,
+ p.project_code AS project_code,
+ u.role AS role
+FROM auth_users u
+CROSS JOIN projects p
+ON CONFLICT (user_id, project_code) DO NOTHING;
+
+-- =============================================================================
+-- Drop the role column from auth_users
+-- =============================================================================
+
+ALTER TABLE auth_users DROP COLUMN IF EXISTS role;
diff --git a/testgen/template/dbupgrade/0175_incremental_upgrade.sql b/testgen/template/dbupgrade/0175_incremental_upgrade.sql
new file mode 100644
index 00000000..bd9d16fe
--- /dev/null
+++ b/testgen/template/dbupgrade/0175_incremental_upgrade.sql
@@ -0,0 +1,15 @@
+SET SEARCH_PATH TO {SCHEMA_NAME};
+
+ALTER TABLE test_definitions
+ ADD COLUMN IF NOT EXISTS flagged BOOLEAN DEFAULT FALSE NOT NULL;
+
+CREATE TABLE IF NOT EXISTS test_definition_notes (
+ id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
+ test_definition_id UUID NOT NULL REFERENCES test_definitions ON DELETE CASCADE,
+ detail TEXT NOT NULL,
+ created_by VARCHAR(100) NOT NULL,
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+ updated_at TIMESTAMP
+);
+
+CREATE INDEX IF NOT EXISTS ix_tdn_tdid ON test_definition_notes(test_definition_id, created_at DESC);
diff --git a/testgen/template/dbupgrade/0176_incremental_upgrade.sql b/testgen/template/dbupgrade/0176_incremental_upgrade.sql
new file mode 100644
index 00000000..0c219e4b
--- /dev/null
+++ b/testgen/template/dbupgrade/0176_incremental_upgrade.sql
@@ -0,0 +1,24 @@
+SET SEARCH_PATH TO {SCHEMA_NAME};
+
+CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.datediff(difftype character varying, firstdate timestamp without time zone, seconddate timestamp without time zone) returns bigint
+ language plpgsql
+as
+$$
+ BEGIN
+ RETURN
+ CASE
+ WHEN UPPER(difftype) IN ('DAY', 'DD')
+ THEN DATE_PART('day', seconddate - firstdate)
+ WHEN UPPER(difftype) IN ('WEEK','WK')
+ THEN (DATE_TRUNC('week', seconddate)::DATE - DATE_TRUNC('week', firstdate)::DATE) / 7
+ WHEN UPPER(difftype) IN ('MON', 'MM')
+ THEN 12 * (DATE_PART('year', seconddate) - DATE_PART('year', firstdate))
+ + (DATE_PART('month', seconddate) - DATE_PART('month', firstdate))
+ WHEN UPPER(difftype) IN ('QUARTER', 'QTR')
+ THEN 4 * (DATE_PART('year', seconddate) - DATE_PART('year', firstdate))
+ + (DATE_PART('qtr', seconddate) - DATE_PART('month', firstdate))
+ WHEN UPPER(difftype) IN ('YEAR', 'YY')
+ THEN DATE_PART('year', seconddate) - DATE_PART('year', firstdate)
+ END;
+ END;
+$$;
diff --git a/testgen/template/dbupgrade/0177_incremental_upgrade.sql b/testgen/template/dbupgrade/0177_incremental_upgrade.sql
new file mode 100644
index 00000000..16d6eeab
--- /dev/null
+++ b/testgen/template/dbupgrade/0177_incremental_upgrade.sql
@@ -0,0 +1,3 @@
+SET SEARCH_PATH TO {SCHEMA_NAME};
+
+ALTER TABLE test_types ADD COLUMN IF NOT EXISTS default_parm_required TEXT;
diff --git a/testgen/template/dbupgrade/0178_incremental_upgrade.sql b/testgen/template/dbupgrade/0178_incremental_upgrade.sql
new file mode 100644
index 00000000..9a583577
--- /dev/null
+++ b/testgen/template/dbupgrade/0178_incremental_upgrade.sql
@@ -0,0 +1,13 @@
+SET SEARCH_PATH TO {SCHEMA_NAME};
+
+ALTER TABLE table_groups
+ ADD COLUMN IF NOT EXISTS profile_flag_pii BOOLEAN DEFAULT TRUE,
+ ADD COLUMN IF NOT EXISTS profile_exclude_xde BOOLEAN DEFAULT TRUE;
+
+ALTER TABLE data_column_chars
+ ADD COLUMN IF NOT EXISTS excluded_data_element BOOLEAN,
+ ADD COLUMN IF NOT EXISTS pii_flag VARCHAR(50);
+
+ALTER TABLE target_data_lookups ADD COLUMN IF NOT EXISTS lookup_redactable_columns VARCHAR(100);
+
+ALTER TABLE profile_anomaly_types ADD COLUMN IF NOT EXISTS detail_redactable BOOLEAN DEFAULT FALSE;
diff --git a/testgen/template/dbupgrade/0179_incremental_upgrade.sql b/testgen/template/dbupgrade/0179_incremental_upgrade.sql
new file mode 100644
index 00000000..2bcf9f14
--- /dev/null
+++ b/testgen/template/dbupgrade/0179_incremental_upgrade.sql
@@ -0,0 +1,31 @@
+SET SEARCH_PATH TO {SCHEMA_NAME};
+
+-- Hash existing fingerprint values for Table_Freshness tests
+-- lower_tolerance stores the last computed fingerprint used for comparison
+UPDATE test_definitions
+ SET lower_tolerance = MD5(lower_tolerance)
+ WHERE test_type = 'Table_Freshness'
+ AND lower_tolerance IS NOT NULL
+ AND LENGTH(lower_tolerance) <> 32;
+
+-- Hash existing fingerprint values for Freshness_Trend monitors
+-- baseline_value stores the fingerprint at the last detected table change
+UPDATE test_definitions
+ SET baseline_value = MD5(baseline_value)
+ WHERE test_type = 'Freshness_Trend'
+ AND baseline_value IS NOT NULL
+ AND LENGTH(baseline_value) <> 32;
+
+-- Hash existing result_signal values for Table_Freshness test results
+UPDATE test_results
+ SET result_signal = MD5(result_signal)
+ WHERE test_type = 'Table_Freshness'
+ AND result_signal IS NOT NULL
+ AND LENGTH(result_signal) <> 32;
+
+-- Hash existing result_measure values for Freshness_Trend test results
+UPDATE test_results
+ SET result_measure = MD5(result_measure)
+ WHERE test_type = 'Freshness_Trend'
+ AND result_measure IS NOT NULL
+ AND LENGTH(result_measure) <> 32;
diff --git a/testgen/template/execution/get_missing_freshness_monitors.sql b/testgen/template/execution/get_missing_freshness_monitors.sql
new file mode 100644
index 00000000..a81fab51
--- /dev/null
+++ b/testgen/template/execution/get_missing_freshness_monitors.sql
@@ -0,0 +1,10 @@
+SELECT DISTINCT dtc.table_name
+FROM data_table_chars dtc
+WHERE dtc.table_groups_id = :TABLE_GROUPS_ID ::UUID
+ AND dtc.drop_date IS NULL
+ AND dtc.table_name NOT IN (
+ SELECT table_name
+ FROM test_definitions
+ WHERE test_suite_id = :TEST_SUITE_ID ::UUID
+ AND test_type = 'Freshness_Trend'
+ );
diff --git a/testgen/template/flavors/bigquery/profiling/project_profiling_query.sql b/testgen/template/flavors/bigquery/profiling/project_profiling_query.sql
new file mode 100644
index 00000000..444437c3
--- /dev/null
+++ b/testgen/template/flavors/bigquery/profiling/project_profiling_query.sql
@@ -0,0 +1,262 @@
+WITH target_table AS (
+-- TG-IF do_sample
+ SELECT * FROM `{DATA_SCHEMA}.{DATA_TABLE}` WHERE RAND() * 100 < {SAMPLE_PERCENT_CALC}
+-- TG-ELSE
+ SELECT * FROM `{DATA_SCHEMA}.{DATA_TABLE}`
+-- TG-ENDIF
+)
+SELECT
+ {CONNECTION_ID} AS connection_id,
+ '{PROJECT_CODE}' AS project_code,
+ '{TABLE_GROUPS_ID}' AS table_groups_id,
+ '{DATA_SCHEMA}' AS schema_name,
+ '{RUN_DATE}' AS run_date,
+ '{DATA_TABLE}' AS table_name,
+ {COL_POS} AS position,
+ '{COL_NAME_SANITIZED}' AS column_name,
+ '{COL_TYPE}' AS column_type,
+ '{DB_DATA_TYPE}' AS db_data_type,
+ '{COL_GEN_TYPE}' AS general_type,
+ COUNT(*) AS record_ct,
+ COUNT(`{COL_NAME}`) AS value_ct,
+ COUNT(DISTINCT `{COL_NAME}`) AS distinct_value_ct,
+ SUM(IF(`{COL_NAME}` IS NULL, 1, 0)) AS null_value_ct,
+-- TG-IF is_type_ADN
+ MIN(LENGTH(CAST(`{COL_NAME}` AS STRING))) AS min_length,
+ MAX(LENGTH(CAST(`{COL_NAME}` AS STRING))) AS max_length,
+ AVG(NULLIF(LENGTH(CAST(`{COL_NAME}` AS STRING)), 0)) AS avg_length,
+-- TG-ELSE
+ NULL AS min_length,
+ NULL AS max_length,
+ NULL AS avg_length,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ SUM(
+ CASE
+ WHEN REGEXP_CONTAINS(TRIM(CAST(`{COL_NAME}` AS STRING)), r'^0(\.0*)?$') THEN 1
+ ELSE 0
+ END
+ ) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ CAST(SUM(1 - ABS(SIGN(CAST(`{COL_NAME}` AS NUMERIC)))) AS INT64) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_not_A_not_N
+ NULL AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ COUNT(
+ DISTINCT UPPER(
+ REGEXP_REPLACE(CAST(`{COL_NAME}` AS STRING), r"[ '\.,-]", "")
+ )
+ ) AS distinct_std_value_ct,
+ SUM(CASE WHEN `{COL_NAME}` = '' THEN 1 ELSE 0 END) AS zero_length_ct,
+ SUM(CASE WHEN `{COL_NAME}` BETWEEN ' !' AND '!' THEN 1 ELSE 0 END) AS lead_space_ct,
+ SUM(
+ CASE
+ WHEN LOWER(CAST(`{COL_NAME}` AS STRING)) LIKE '"%"'
+ OR LOWER(CAST(`{COL_NAME}` AS STRING)) LIKE "'%'" THEN 1
+ ELSE 0
+ END
+ ) AS quoted_value_ct,
+ SUM(
+ CASE
+ WHEN REGEXP_CONTAINS(CAST(`{COL_NAME}` AS STRING), r'.*[0-9].*') THEN 1
+ ELSE 0
+ END
+ ) AS includes_digit_ct,
+ SUM(
+ CASE
+ WHEN REGEXP_CONTAINS(LOWER(CAST(`{COL_NAME}` AS STRING)), r'^(\.{1,}|-{1,}|\?{1,}|\s{1,}|0{2,}|9{2,}|x{2,}|z{2,})$') THEN 1
+ WHEN LOWER(CAST(`{COL_NAME}` AS STRING)) IN ('blank','error','missing','tbd',
+ 'n/a','#na','none','null','unknown') THEN 1
+ WHEN LOWER(CAST(`{COL_NAME}` AS STRING)) IN ('(blank)','(error)','(missing)','(tbd)',
+ '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
+ WHEN LOWER(CAST(`{COL_NAME}` AS STRING)) IN ('[blank]','[error]','[missing]','[tbd]',
+ '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
+ ELSE 0
+ END
+ ) AS filled_value_ct,
+ LEFT(MIN(NULLIF(`{COL_NAME}`, '')), 100) AS min_text,
+ LEFT(MAX(NULLIF(`{COL_NAME}`, '')), 100) AS max_text,
+ SUM(CASE WHEN `{COL_NAME}` = UPPER(`{COL_NAME}`) AND `{COL_NAME}` <> LOWER(`{COL_NAME}`) THEN 1 ELSE 0 END) AS upper_case_ct,
+ SUM(CASE WHEN `{COL_NAME}` = LOWER(`{COL_NAME}`) AND `{COL_NAME}` <> UPPER(`{COL_NAME}`) THEN 1 ELSE 0 END) AS lower_case_ct,
+ SUM(CASE WHEN `{COL_NAME}` = UPPER(`{COL_NAME}`) AND `{COL_NAME}` = LOWER(`{COL_NAME}`) THEN 1 ELSE 0 END) AS non_alpha_ct,
+ COUNTIF(
+ TRANSLATE(
+ CAST(`{COL_NAME}` AS STRING),
+ CODE_POINTS_TO_STRING([160, 8201, 8203, 8204, 8205, 8206, 8207, 8239, 12288, 65279]),
+ REPEAT('X', 10)
+ ) <> CAST(`{COL_NAME}` AS STRING)
+ ) AS non_printing_ct,
+ SUM(<%IS_NUM;LEFT(`{COL_NAME}`, 31)%>) AS numeric_ct,
+ SUM(<%IS_DATE;LEFT(`{COL_NAME}`, 26)%>) AS date_ct,
+ CASE
+ WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^[0-9]{1,5}[a-zA-Z]?\s\w{1,5}\.?\s?\w*\s?\w*\s[a-zA-Z]{1,6}\.?\s?[0-9]{0,5}[A-Z]{0,1}$')
+ THEN 1 END), COUNT(`{COL_NAME}`)) > 0.8 THEN 'STREET_ADDR'
+ WHEN SAFE_DIVIDE(SUM(CASE WHEN `{COL_NAME}` IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
+ THEN 1 END), COUNT(`{COL_NAME}`)) > 0.9 THEN 'STATE_USA'
+ WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^(\\+1|1)?[ .-]?(\\([2-9][0-9]{2}\\)|[2-9][0-9]{2})[ .-]?[2-9][0-9]{2}[ .-]?[0-9]{4}$')
+ THEN 1 END), COUNT(`{COL_NAME}`)) > 0.8 THEN 'PHONE_USA'
+ WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$')
+ AND `{COL_NAME}` NOT LIKE '%://%'
+ THEN 1 END), COUNT(`{COL_NAME}`)) > 0.9 THEN 'EMAIL'
+ WHEN SAFE_DIVIDE(SUM(CASE WHEN TRANSLATE(`{COL_NAME}`, '012345678', '999999999') IN ('99999', '999999999', '99999-9999')
+ THEN 1 END), COUNT(`{COL_NAME}`)) > 0.9 THEN 'ZIP_USA'
+ WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^[\w\s\-]+\.(txt|csv|tsv|dat|doc|pdf|xlsx)$')
+ THEN 1 END), COUNT(`{COL_NAME}`)) > 0.9 THEN 'FILE_NAME'
+ WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^([0-9]{4}[- ]?){3}[0-9]{4}$')
+ THEN 1 END), COUNT(`{COL_NAME}`)) > 0.8 THEN 'CREDIT_CARD'
+ WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$')
+ AND NOT REGEXP_CONTAINS(`{COL_NAME}`, r'\s(and|but|or|yet)\s')
+ THEN 1 END), COUNT(`{COL_NAME}`)) > 0.8 THEN 'DELIMITED_DATA'
+ WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$')
+ AND CAST(SUBSTR(`{COL_NAME}`, 1, 3) AS INT64) NOT BETWEEN 734 AND 749
+ AND SUBSTR(`{COL_NAME}`, 1, 3) <> '666'
+ THEN 1 END), COUNT(`{COL_NAME}`)) > 0.9 THEN 'SSN'
+ END AS std_pattern_match,
+-- TG-ELSE
+ NULL AS distinct_std_value_ct,
+ NULL AS zero_length_ct,
+ NULL AS lead_space_ct,
+ NULL AS quoted_value_ct,
+ NULL AS includes_digit_ct,
+ NULL AS filled_value_ct,
+ NULL AS min_text,
+ NULL AS max_text,
+ NULL AS upper_case_ct,
+ NULL AS lower_case_ct,
+ NULL AS non_alpha_ct,
+ NULL AS non_printing_ct,
+ NULL AS numeric_ct,
+ NULL AS date_ct,
+ NULL AS std_pattern_match,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (
+ SELECT LEFT(STRING_AGG(val, ' | ' ORDER BY ct DESC), 1000) AS top_patterns
+ FROM (
+ SELECT CONCAT(CAST(ct AS STRING), ' | ', pattern) AS val,
+ ct
+ FROM (
+ SELECT pattern,
+ COUNT(*) AS ct
+ FROM (
+ SELECT REGEXP_REPLACE(
+ REGEXP_REPLACE(
+ REGEXP_REPLACE(CAST({COL_NAME} AS STRING), r'[a-z]', 'a'),
+ r'[A-Z]', 'A'),
+ r'[0-9]', 'N') AS pattern
+ FROM `target_table`
+ WHERE {COL_NAME} > ' '
+ AND (
+ SELECT MAX(LENGTH(CAST({COL_NAME} AS STRING)))
+ FROM `target_table`
+ ) BETWEEN 3 AND {MAX_PATTERN_LENGTH}
+ ) p
+ GROUP BY pattern
+ HAVING pattern > ' '
+ ORDER BY ct DESC
+ LIMIT 5
+ )
+ ) ps
+ ) AS top_patterns,
+-- TG-ELSE
+ NULL AS top_patterns,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ MIN(`{COL_NAME}`) AS min_value,
+ MIN(CASE WHEN `{COL_NAME}` > 0 THEN `{COL_NAME}` ELSE NULL END) AS min_value_over_0,
+ MAX(`{COL_NAME}`) AS max_value,
+ AVG(CAST(`{COL_NAME}` AS FLOAT64)) AS avg_value,
+ STDDEV(CAST(`{COL_NAME}` AS FLOAT64)) AS stdev_value,
+ MIN(pct_25) AS percentile_25,
+ MIN(pct_50) AS percentile_50,
+ MIN(pct_75) AS percentile_75,
+-- TG-ELSE
+ NULL AS min_value,
+ NULL AS min_value_over_0,
+ NULL AS max_value,
+ NULL AS avg_value,
+ NULL AS stdev_value,
+ NULL AS percentile_25,
+ NULL AS percentile_50,
+ NULL AS percentile_75,
+-- TG-ENDIF
+-- TG-IF is_N_decimal
+ SUM(COALESCE(ROUND(ABS(MOD(`{COL_NAME}`, 1)), 5), 0)) AS fractional_sum,
+-- TG-ELSE
+ NULL AS fractional_sum,
+-- TG-ENDIF
+-- TG-IF is_type_D
+ MIN(`{COL_NAME}`) AS min_date, -- Other flavors have a minimum threshold of 0001-01-01, but BigQuery doesn't make it easy to to the same
+ MAX(`{COL_NAME}`) AS max_date,
+ COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), MONTH) > 12 THEN 1 END) AS before_1yr_date_ct,
+ COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), MONTH) > 60 THEN 1 END) AS before_5yr_date_ct,
+ COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), MONTH) > 240 THEN 1 END) AS before_20yr_date_ct,
+ COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), MONTH) > 1200 THEN 1 END) AS before_100yr_date_ct,
+ COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), DAY) BETWEEN 0 AND 365 THEN 1 END) AS within_1yr_date_ct,
+ COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), DAY) BETWEEN 0 AND 30 THEN 1 END) AS within_1mo_date_ct,
+ COUNT(CASE WHEN SAFE_CAST(DATE(`{COL_NAME}`) AS DATE) > SAFE_CAST(DATE('{RUN_DATE}') AS DATE) THEN 1 END) AS future_date_ct,
+ COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), SAFE_CAST(DATE('{RUN_DATE}') AS DATE), MONTH) > 240 THEN 1 END) AS distant_future_date_ct,
+ COUNT(DISTINCT DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), DAY)) AS date_days_present,
+ COUNT(DISTINCT DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), ISOWEEK)) AS date_weeks_present,
+ COUNT(DISTINCT DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), MONTH)) AS date_months_present,
+-- TG-ELSE
+ NULL AS min_date,
+ NULL AS max_date,
+ NULL AS before_1yr_date_ct,
+ NULL AS before_5yr_date_ct,
+ NULL AS before_20yr_date_ct,
+ NULL AS before_100yr_date_ct,
+ NULL AS within_1yr_date_ct,
+ NULL AS within_1mo_date_ct,
+ NULL AS future_date_ct,
+ NULL AS distant_future_date_ct,
+ NULL AS date_days_present,
+ NULL AS date_weeks_present,
+ NULL AS date_months_present,
+-- TG-ENDIF
+-- TG-IF is_type_B
+ SUM(CAST(`{COL_NAME}` AS INT64)) AS boolean_true_ct,
+-- TG-ELSE
+ NULL AS boolean_true_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (
+ SELECT
+ COUNT(DISTINCT REGEXP_REPLACE(
+ REGEXP_REPLACE(
+ REGEXP_REPLACE(CAST(`{COL_NAME}` AS STRING), r'[a-z]', 'a'),
+ r'[A-Z]', 'A'
+ ),
+ r'[0-9]', 'N'
+ )) AS pattern_ct
+ FROM `target_table`
+ WHERE `{COL_NAME}` > ' '
+ ) AS distinct_pattern_ct,
+ SUM(CAST(SIGN(LENGTH(TRIM(`{COL_NAME}`)) - LENGTH(REPLACE(TRIM(`{COL_NAME}`), ' ', ''))) AS INT64)) AS embedded_space_ct,
+ AVG(CAST(LENGTH(TRIM(`{COL_NAME}`)) - LENGTH(REPLACE(TRIM(`{COL_NAME}`), ' ', '')) AS FLOAT64)) AS avg_embedded_spaces,
+-- TG-ELSE
+ NULL AS distinct_pattern_ct,
+ NULL AS embedded_space_ct,
+ NULL AS avg_embedded_spaces,
+-- TG-ENDIF
+ '{PROFILE_RUN_ID}' AS profile_run_id
+ FROM target_table
+-- TG-IF is_N_sampling
+ ,
+ (SELECT
+ APPROX_QUANTILES(`{COL_NAME}`, 100)[OFFSET(25)] AS pct_25,
+ APPROX_QUANTILES(`{COL_NAME}`, 100)[OFFSET(50)] AS pct_50,
+ APPROX_QUANTILES(`{COL_NAME}`, 100)[OFFSET(75)] AS pct_75
+ FROM `{DATA_SCHEMA}.{DATA_TABLE}` LIMIT 1) pctile
+-- TG-ENDIF
+-- TG-IF is_N_no_sampling
+ ,
+ (SELECT
+ PERCENTILE_CONT(`{COL_NAME}`, 0.25) OVER() AS pct_25,
+ PERCENTILE_CONT(`{COL_NAME}`, 0.50) OVER() AS pct_50,
+ PERCENTILE_CONT(`{COL_NAME}`, 0.75) OVER() AS pct_75
+ FROM `{DATA_SCHEMA}.{DATA_TABLE}` LIMIT 1) pctile
+-- TG-ENDIF
diff --git a/testgen/template/flavors/bigquery/profiling/project_profiling_query.yaml b/testgen/template/flavors/bigquery/profiling/project_profiling_query.yaml
deleted file mode 100644
index 0a9c6350..00000000
--- a/testgen/template/flavors/bigquery/profiling/project_profiling_query.yaml
+++ /dev/null
@@ -1,257 +0,0 @@
----
-01_sampling: |
- WITH target_table AS (
- SELECT * FROM `{DATA_SCHEMA}.{DATA_TABLE}` WHERE RAND() * 100 < {SAMPLE_PERCENT_CALC}
- )
- SELECT
-01_else: |
- WITH target_table AS (
- SELECT * FROM `{DATA_SCHEMA}.{DATA_TABLE}`
- )
- SELECT
-01_all: |
- {CONNECTION_ID} as connection_id,
- '{PROJECT_CODE}' as project_code,
- '{TABLE_GROUPS_ID}' as table_groups_id,
- '{DATA_SCHEMA}' AS schema_name,
- '{RUN_DATE}' AS run_date,
- '{DATA_TABLE}' AS table_name,
- {COL_POS} AS position,
- '{COL_NAME_SANITIZED}' AS column_name,
- '{COL_TYPE}' AS column_type,
- '{DB_DATA_TYPE}' AS db_data_type,
- '{COL_GEN_TYPE}' AS general_type,
- COUNT(*) AS record_ct,
-
-02_X: |
- COUNT(`{COL_NAME}`) AS value_ct,
- COUNT(DISTINCT `{COL_NAME}`) AS distinct_value_ct,
- SUM(IF(`{COL_NAME}` IS NULL, 1, 0)) AS null_value_ct,
-02_else: |
- COUNT(`{COL_NAME}`) AS value_ct,
- COUNT(DISTINCT `{COL_NAME}`) AS distinct_value_ct,
- SUM(IF(`{COL_NAME}` IS NULL, 1, 0)) AS null_value_ct,
-
-03_ADN: MIN(LENGTH(CAST(`{COL_NAME}` AS STRING))) AS min_length,
- MAX(LENGTH(CAST(`{COL_NAME}` AS STRING))) AS max_length,
- AVG(NULLIF(LENGTH(CAST(`{COL_NAME}` AS STRING)), 0)) AS avg_length,
-03_else: NULL as min_length,
- NULL as max_length,
- NULL as avg_length,
-
-04_A: SUM(
- CASE
- WHEN REGEXP_CONTAINS(TRIM(CAST(`{COL_NAME}` AS STRING)), r'^0(\.0*)?$') THEN 1
- ELSE 0
- END
- ) AS zero_value_ct,
-04_N: CAST(SUM(1 - ABS(SIGN(CAST(`{COL_NAME}` AS NUMERIC)))) AS INT64) AS zero_value_ct,
-04_else: NULL as zero_value_ct,
-
-05_A: |
- COUNT(
- DISTINCT UPPER(
- REGEXP_REPLACE(CAST(`{COL_NAME}` AS STRING), r"[ '\.,-]", "")
- )
- ) as distinct_std_value_ct,
- SUM(CASE WHEN `{COL_NAME}` = '' THEN 1 ELSE 0 END) AS zero_length_ct,
- SUM(CASE WHEN `{COL_NAME}` BETWEEN ' !' AND '!' THEN 1 ELSE 0 END) AS lead_space_ct,
- SUM(
- CASE
- WHEN LOWER(CAST(`{COL_NAME}` AS STRING)) LIKE '"%"'
- OR LOWER(CAST(`{COL_NAME}` AS STRING)) LIKE "'%'" THEN 1
- ELSE 0
- END
- ) AS quoted_value_ct,
- SUM(
- CASE
- WHEN REGEXP_CONTAINS(CAST(`{COL_NAME}` AS STRING), r'.*[0-9].*') THEN 1
- ELSE 0
- END
- ) AS includes_digit_ct,
- SUM(
- CASE
- WHEN CAST(`{COL_NAME}` AS STRING) IN ('.', '?', ' ') THEN 1
- WHEN REGEXP_CONTAINS(LOWER(CAST(`{COL_NAME}` AS STRING)), r'^\s*[-09xz]{2,}\s*$') THEN 1
- WHEN LOWER(CAST(`{COL_NAME}` AS STRING)) IN ('blank','error','missing','tbd',
- 'n/a','#na','none','null','unknown') THEN 1
- WHEN LOWER(CAST(`{COL_NAME}` AS STRING)) IN ('(blank)','(error)','(missing)','(tbd)',
- '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
- WHEN LOWER(CAST(`{COL_NAME}` AS STRING)) IN ('[blank]','[error]','[missing]','[tbd]',
- '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
- ELSE 0
- END
- ) AS filled_value_ct,
- LEFT(MIN(NULLIF(`{COL_NAME}`, '')), 100) AS min_text,
- LEFT(MAX(NULLIF(`{COL_NAME}`, '')), 100) AS max_text,
- SUM( CASE WHEN `{COL_NAME}` = UPPER(`{COL_NAME}`) AND `{COL_NAME}` <> LOWER(`{COL_NAME}`) THEN 1 ELSE 0 END) AS upper_case_ct,
- SUM( CASE WHEN `{COL_NAME}` = LOWER(`{COL_NAME}`) AND `{COL_NAME}` <> UPPER(`{COL_NAME}`) THEN 1 ELSE 0 END) AS lower_case_ct,
- SUM( CASE WHEN `{COL_NAME}` = UPPER(`{COL_NAME}`) AND `{COL_NAME}` = LOWER(`{COL_NAME}`) THEN 1 ELSE 0 END) AS non_alpha_ct,
- COUNTIF(
- TRANSLATE(
- CAST(`{COL_NAME}` AS STRING),
- CODE_POINTS_TO_STRING([160, 8201, 8203, 8204, 8205, 8206, 8207, 8239, 12288, 65279]),
- REPEAT('X', 10)
- ) <> CAST(`{COL_NAME}` AS STRING)
- ) as non_printing_ct,
- SUM(<%IS_NUM;LEFT(`{COL_NAME}`, 31)%>) AS numeric_ct,
- SUM(<%IS_DATE;LEFT(`{COL_NAME}`, 26)%>) AS date_ct,
- CASE
- WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^[0-9]{1,5}[a-zA-Z]?\s\w{1,5}\.?\s?\w*\s?\w*\s[a-zA-Z]{1,6}\.?\s?[0-9]{0,5}[A-Z]{0,1}$')
- THEN 1 END), COUNT(`{COL_NAME}`)) > 0.8 THEN 'STREET_ADDR'
- WHEN SAFE_DIVIDE(SUM(CASE WHEN `{COL_NAME}` IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
- THEN 1 END), COUNT(`{COL_NAME}`)) > 0.9 THEN 'STATE_USA'
- WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^(\\+1|1)?[ .-]?(\\([2-9][0-9]{2}\\)|[2-9][0-9]{2})[ .-]?[2-9][0-9]{2}[ .-]?[0-9]{4}$')
- THEN 1 END), COUNT(`{COL_NAME}`)) > 0.8 THEN 'PHONE_USA'
- WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$')
- THEN 1 END), COUNT(`{COL_NAME}`)) > 0.9 THEN 'EMAIL'
- WHEN SAFE_DIVIDE(SUM(CASE WHEN TRANSLATE(`{COL_NAME}`, '012345678', '999999999') IN ('99999', '999999999', '99999-9999')
- THEN 1 END), COUNT(`{COL_NAME}`)) > 0.9 THEN 'ZIP_USA'
- WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^[\w\s\-]+\.(txt|csv|tsv|dat|doc|pdf|xlsx)$')
- THEN 1 END), COUNT(`{COL_NAME}`)) > 0.9 THEN 'FILE_NAME'
- WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^([0-9]{4}[- ]){3}[0-9]{4}$')
- THEN 1 END), COUNT(`{COL_NAME}`)) > 0.8 THEN 'CREDIT_CARD'
- WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$')
- AND NOT REGEXP_CONTAINS(`{COL_NAME}`, r'\s(and|but|or|yet)\s')
- THEN 1 END), COUNT(`{COL_NAME}`)) > 0.8 THEN 'DELIMITED_DATA'
- WHEN SAFE_DIVIDE(SUM(CASE WHEN REGEXP_CONTAINS(`{COL_NAME}`, r'^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$')
- AND CAST(SUBSTR(`{COL_NAME}`, 1, 3) AS INT64) NOT BETWEEN 734 AND 749
- AND SUBSTR(`{COL_NAME}`, 1, 3) <> '666'
- THEN 1 END), COUNT(`{COL_NAME}`)) > 0.9 THEN 'SSN'
- END AS std_pattern_match,
-05_else: NULL as distinct_std_value_ct,
- NULL as zero_length_ct,
- NULL as lead_space_ct,
- NULL as quoted_value_ct,
- NULL as includes_digit_ct,
- NULL as filled_value_ct,
- NULL as min_text,
- NULL as max_text,
- NULL as upper_case_ct,
- NULL as lower_case_ct,
- NULL as non_alpha_ct,
- NULL as non_printing_ct,
- NULL as numeric_ct,
- NULL as date_ct,
- NULL as std_pattern_match,
-
-06_A: |
- (
- SELECT LEFT(STRING_AGG(val, ' | ' ORDER BY ct DESC), 1000) AS top_patterns
- FROM (
- SELECT CONCAT(CAST(ct AS STRING), ' | ', pattern) AS val,
- ct
- FROM (
- SELECT pattern,
- COUNT(*) AS ct
- FROM (
- SELECT REGEXP_REPLACE(
- REGEXP_REPLACE(
- REGEXP_REPLACE(CAST({COL_NAME} AS STRING), r'[a-z]', 'a'),
- r'[A-Z]', 'A'),
- r'[0-9]', 'N') AS pattern
- FROM `target_table`
- WHERE {COL_NAME} > ' '
- AND (
- SELECT MAX(LENGTH(CAST({COL_NAME} AS STRING)))
- FROM `target_table`
- ) BETWEEN 3 AND {MAX_PATTERN_LENGTH}
- ) p
- GROUP BY pattern
- HAVING pattern > ' '
- ORDER BY ct DESC
- LIMIT 5
- )
- ) ps
- ) as top_patterns,
-06_else: NULL as top_patterns,
-
-08_N: MIN(`{COL_NAME}`) AS min_value,
- MIN(CASE WHEN `{COL_NAME}` > 0 THEN `{COL_NAME}` ELSE NULL END) AS min_value_over_0,
- MAX(`{COL_NAME}`) AS max_value,
- AVG(CAST(`{COL_NAME}` AS FLOAT64)) AS avg_value,
- STDDEV(CAST(`{COL_NAME}` AS FLOAT64)) AS stdev_value,
- MIN(pct_25) AS percentile_25,
- MIN(pct_50) AS percentile_50,
- MIN(pct_75) AS percentile_75,
-08_else: NULL as min_value,
- NULL as min_value_over_0,
- NULL as max_value,
- NULL as avg_value,
- NULL as stdev_value,
- NULL as percentile_25,
- NULL as percentile_50,
- NULL as percentile_75,
-
-10_N_dec: SUM(COALESCE(ROUND(ABS(MOD(`{COL_NAME}`, 1)), 5), 0)) as fractional_sum,
-10_else: NULL as fractional_sum,
-
-11_D: |
- MIN(`{COL_NAME}`) AS min_date, -- Other flavors have a minimum threshold of 0001-01-01, but BigQuery doesn't make it easy to to the same
- MAX(`{COL_NAME}`) as max_date,
- COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), MONTH) > 12 THEN 1 END) AS before_1yr_date_ct,
- COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), MONTH) > 60 THEN 1 END) AS before_5yr_date_ct,
- COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), MONTH) > 240 THEN 1 END) AS before_20yr_date_ct,
- COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), MONTH) > 1200 THEN 1 END) AS before_100yr_date_ct,
- COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), DAY) BETWEEN 0 AND 365 THEN 1 END) AS within_1yr_date_ct,
- COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), DAY) BETWEEN 0 AND 30 THEN 1 END) AS within_1mo_date_ct,
- COUNT(CASE WHEN SAFE_CAST(DATE(`{COL_NAME}`) AS DATE) > SAFE_CAST(DATE('{RUN_DATE}') AS DATE) THEN 1 END) AS future_date_ct,
- COUNT(CASE WHEN DATE_DIFF(SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), SAFE_CAST(DATE('{RUN_DATE}') AS DATE), MONTH) > 240 THEN 1 END) AS distant_future_date_ct,
- COUNT(DISTINCT DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), DAY)) AS date_days_present,
- COUNT(DISTINCT DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), WEEK)) AS date_weeks_present,
- COUNT(DISTINCT DATE_DIFF(SAFE_CAST(DATE('{RUN_DATE}') AS DATE), SAFE_CAST(DATE(`{COL_NAME}`) AS DATE), MONTH)) AS date_months_present,
-11_else: NULL as min_date,
- NULL as max_date,
- NULL as before_1yr_date_ct,
- NULL as before_5yr_date_ct,
- NULL as before_20yr_date_ct,
- NULL AS before_100yr_date_ct,
- NULL as within_1yr_date_ct,
- NULL as within_1mo_date_ct,
- NULL as future_date_ct,
- NULL as distant_future_date_ct,
- NULL as date_days_present,
- NULL as date_weeks_present,
- NULL as date_months_present,
-
-12_B: SUM(CAST(`{COL_NAME}` AS INT64)) AS boolean_true_ct,
-12_else: NULL as boolean_true_ct,
-
-14_A: |
- (
- SELECT
- COUNT(DISTINCT REGEXP_REPLACE(
- REGEXP_REPLACE(
- REGEXP_REPLACE(CAST(`{COL_NAME}` AS STRING), r'[a-z]', 'a'),
- r'[A-Z]', 'A'
- ),
- r'[0-9]', 'N'
- )) AS pattern_ct
- FROM `target_table`
- WHERE `{COL_NAME}` > ' '
- ) as distinct_pattern_ct,
- SUM(CAST(SIGN(LENGTH(TRIM(`{COL_NAME}`)) - LENGTH(REPLACE(TRIM(`{COL_NAME}`), ' ', ''))) AS INT64)) AS embedded_space_ct,
- AVG(CAST(LENGTH(TRIM(`{COL_NAME}`)) - LENGTH(REPLACE(TRIM(`{COL_NAME}`), ' ', '')) AS FLOAT64)) AS avg_embedded_spaces,
-14_else: NULL as distinct_pattern_ct,
- NULL as embedded_space_ct,
- NULL as avg_embedded_spaces,
-
-16_all: " '{PROFILE_RUN_ID}' as profile_run_id "
-
-98_all: ' FROM target_table'
-
-99_N: |
- ,
- (SELECT
- PERCENTILE_CONT(`{COL_NAME}`, 0.25) OVER() AS pct_25,
- PERCENTILE_CONT(`{COL_NAME}`, 0.50) OVER() AS pct_50,
- PERCENTILE_CONT(`{COL_NAME}`, 0.75) OVER() AS pct_75
- FROM `{DATA_SCHEMA}.{DATA_TABLE}` LIMIT 1) pctile
-99_N_sampling: |
- ,
- (SELECT
- APPROX_QUANTILES(`{COL_NAME}`, 100)[OFFSET(25)] AS pct_25,
- APPROX_QUANTILES(`{COL_NAME}`, 100)[OFFSET(50)] AS pct_50,
- APPROX_QUANTILES(`{COL_NAME}`, 100)[OFFSET(75)] AS pct_75
- FROM `{DATA_SCHEMA}.{DATA_TABLE}` LIMIT 1) pctile
-99_else: ;
diff --git a/testgen/template/flavors/databricks/profiling/project_profiling_query.yaml b/testgen/template/flavors/databricks/profiling/project_profiling_query.sql
similarity index 65%
rename from testgen/template/flavors/databricks/profiling/project_profiling_query.yaml
rename to testgen/template/flavors/databricks/profiling/project_profiling_query.sql
index 2fc9350d..0ffe73cc 100644
--- a/testgen/template/flavors/databricks/profiling/project_profiling_query.yaml
+++ b/testgen/template/flavors/databricks/profiling/project_profiling_query.sql
@@ -1,18 +1,14 @@
----
-01_sampling: |
- WITH target_table AS (
- SELECT * FROM `{DATA_SCHEMA}`.`{DATA_TABLE}` TABLESAMPLE ({SAMPLE_PERCENT_CALC} PERCENT)
- )
- SELECT
-01_else: |
- WITH target_table AS (
- SELECT * FROM `{DATA_SCHEMA}`.`{DATA_TABLE}`
- )
- SELECT
-01_all: |
- {CONNECTION_ID} as connection_id,
- '{PROJECT_CODE}' as project_code,
- '{TABLE_GROUPS_ID}' as table_groups_id,
+WITH target_table AS (
+-- TG-IF do_sample
+ SELECT * FROM `{DATA_SCHEMA}`.`{DATA_TABLE}` TABLESAMPLE ({SAMPLE_PERCENT_CALC} PERCENT)
+-- TG-ELSE
+ SELECT * FROM `{DATA_SCHEMA}`.`{DATA_TABLE}`
+-- TG-ENDIF
+)
+SELECT
+ {CONNECTION_ID} AS connection_id,
+ '{PROJECT_CODE}' AS project_code,
+ '{TABLE_GROUPS_ID}' AS table_groups_id,
'{DATA_SCHEMA}' AS schema_name,
'{RUN_DATE}' AS run_date,
'{DATA_TABLE}' AS table_name,
@@ -22,49 +18,54 @@
'{DB_DATA_TYPE}' AS db_data_type,
'{COL_GEN_TYPE}' AS general_type,
COUNT(*) AS record_ct,
-
-02_X: |
COUNT(`{COL_NAME}`) AS value_ct,
COUNT(DISTINCT `{COL_NAME}`) AS distinct_value_ct,
SUM(CASE WHEN `{COL_NAME}` IS NULL THEN 1 ELSE 0 END) AS null_value_ct,
-02_else: |
- COUNT(`{COL_NAME}`) AS value_ct,
- COUNT(DISTINCT `{COL_NAME}`) AS distinct_value_ct,
- SUM(CASE WHEN `{COL_NAME}` IS NULL THEN 1 ELSE 0 END) AS null_value_ct,
-
-03_ADN: MIN(LEN(`{COL_NAME}`)) AS min_length,
- MAX(LEN(`{COL_NAME}`)) AS max_length,
+-- TG-IF is_type_ADN
+ MIN(LEN(`{COL_NAME}`)) AS min_length,
+ MAX(LEN(`{COL_NAME}`)) AS max_length,
AVG(CAST(NULLIF(LEN(`{COL_NAME}`), 0) AS FLOAT)) AS avg_length,
-03_else: NULL as min_length,
- NULL as max_length,
- NULL as avg_length,
-
-04_A: SUM(CASE
+-- TG-ELSE
+ NULL AS min_length,
+ NULL AS max_length,
+ NULL AS avg_length,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ SUM(CASE
WHEN LTRIM(RTRIM(`{COL_NAME}`)) RLIKE '0([.]0*)' THEN 1 ELSE 0
- END) AS zero_value_ct,
-04_N: CAST(SUM( 1 - ABS(SIGN(`{COL_NAME}`)))AS BIGINT ) AS zero_value_ct,
-04_else: NULL as zero_value_ct,
-
-05_A: COUNT(DISTINCT UPPER(REPLACE(TRANSLATE(`{COL_NAME}`,' '''',.-',REPEAT(' ', LEN(' '''',.-'))),' ',''))) as distinct_std_value_ct,
+ END) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ CAST(SUM( 1 - ABS(SIGN(`{COL_NAME}`)))AS BIGINT ) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_not_A_not_N
+ NULL AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ COUNT(DISTINCT UPPER(REPLACE(TRANSLATE(`{COL_NAME}`,' '''',.-',REPEAT(' ', LEN(' '''',.-'))),' ',''))) AS distinct_std_value_ct,
SUM(CASE
WHEN `{COL_NAME}` = '' THEN 1
- ELSE 0
- END) AS zero_length_ct,
- SUM( CASE
- WHEN `{COL_NAME}` BETWEEN ' !' AND '!' THEN 1
- ELSE 0
- END ) AS lead_space_ct,
- SUM( CASE WHEN `{COL_NAME}` LIKE '"%"' OR `{COL_NAME}` LIKE '\'%\'' THEN 1 ELSE 0 END ) as quoted_value_ct,
- SUM( CASE WHEN `{COL_NAME}` RLIKE '[0-9]' THEN 1 ELSE 0 END ) as includes_digit_ct,
- SUM( CASE
- WHEN `{COL_NAME}` IN ('.', '?') OR `{COL_NAME}` RLIKE '^\s+$' THEN 1
+ ELSE 0
+ END) AS zero_length_ct,
+ SUM(CASE
+ WHEN `{COL_NAME}` BETWEEN ' !' AND '!' THEN 1
+ ELSE 0
+ END) AS lead_space_ct,
+ SUM(CASE WHEN `{COL_NAME}` LIKE '"%"' OR `{COL_NAME}` LIKE '\'%\'' THEN 1 ELSE 0 END) AS quoted_value_ct,
+ SUM(CASE WHEN `{COL_NAME}` RLIKE '[0-9]' THEN 1 ELSE 0 END) AS includes_digit_ct,
+ SUM(CASE
+ WHEN LEN(`{COL_NAME}`) > 0
+ AND ((LEN(REPLACE(`{COL_NAME}`, '.', ''))= 0 )
+ OR (LEN(REPLACE(`{COL_NAME}`, '-', ''))= 0 )
+ OR (LEN(REPLACE(`{COL_NAME}`, '?', ''))= 0 )
+ OR (LEN(REPLACE(`{COL_NAME}`, ' ', ''))= 0 )
+ ) THEN 1
WHEN LEN(`{COL_NAME}`) > 1
- AND ( LOWER(`{COL_NAME}`) LIKE '%..%' OR LOWER(`{COL_NAME}`) RLIKE '--'
- OR (LEN(REPLACE(`{COL_NAME}`, '0', ''))= 0 )
+ AND ((LEN(REPLACE(`{COL_NAME}`, '0', ''))= 0 )
OR (LEN(REPLACE(`{COL_NAME}`, '9', ''))= 0 )
OR (LEN(REPLACE(LOWER(`{COL_NAME}`), 'x', ''))= 0 )
OR (LEN(REPLACE(LOWER(`{COL_NAME}`), 'z', ''))= 0 )
- ) THEN 1
+ ) THEN 1
WHEN LOWER(`{COL_NAME}`) IN ('blank','error','missing','tbd',
'n/a','#na','none','null','unknown') THEN 1
WHEN LOWER(`{COL_NAME}`) IN ('(blank)','(error)','(missing)','(tbd)',
@@ -72,9 +73,9 @@
WHEN LOWER(`{COL_NAME}`) IN ('[blank]','[error]','[missing]','[tbd]',
'[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
ELSE 0
- END ) AS filled_value_ct,
- LEFT(MIN(NULLIF(`{COL_NAME}`, '')), 100) AS min_text,
- LEFT(MAX(NULLIF(`{COL_NAME}`, '')), 100) AS max_text,
+ END) AS filled_value_ct,
+ LEFT(MIN(NULLIF(`{COL_NAME}`, '')), 100) AS min_text,
+ LEFT(MAX(NULLIF(`{COL_NAME}`, '')), 100) AS max_text,
SUM(CASE
WHEN TRANSLATE(`{COL_NAME}`, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = `{COL_NAME}` THEN 0
WHEN TRANSLATE(`{COL_NAME}`, 'abcdefghijklmnopqrstuvwxyz', ' ') = `{COL_NAME}` THEN 1
@@ -89,17 +90,18 @@
WHEN TRANSLATE(`{COL_NAME}`, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = `{COL_NAME}` THEN 1
ELSE 0
END) AS non_alpha_ct,
- COUNT( CASE WHEN TRANSLATE(`{COL_NAME}`, '\u00a0\u2009\u200b\u200c\u200d\u200e\u200f\u202f\u3000\ufeff', 'XXXXXXXXXX') <> `{COL_NAME}` THEN 1 END) as non_printing_ct,
- SUM(<%IS_NUM;LEFT(`{COL_NAME}`, 31)%>) AS numeric_ct,
- SUM(<%IS_DATE;LEFT(`{COL_NAME}`, 26)%>) AS date_ct,
+ COUNT(CASE WHEN TRANSLATE(`{COL_NAME}`, '\u00a0\u2009\u200b\u200c\u200d\u200e\u200f\u202f\u3000\ufeff', 'XXXXXXXXXX') <> `{COL_NAME}` THEN 1 END) AS non_printing_ct,
+ SUM(<%IS_NUM;LEFT(`{COL_NAME}`, 31)%>) AS numeric_ct,
+ SUM(<%IS_DATE;LEFT(`{COL_NAME}`, 26)%>) AS date_ct,
CASE
WHEN CAST(SUM( CASE WHEN UPPER(`{COL_NAME}`) RLIKE '[1-9]{1,5} [A-Z]+ .*'
- THEN 1 END ) as FLOAT) /CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.8 THEN 'STREET_ADDR'
+ THEN 1 END ) AS FLOAT) /CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.8 THEN 'STREET_ADDR'
WHEN CAST(SUM(CASE WHEN `{COL_NAME}` IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.9 THEN 'STATE_USA'
WHEN CAST(SUM( CASE WHEN `{COL_NAME}` RLIKE '\\+1\\s*\\(?\\d{3}\\)?[-. ]*\\d{3}[-. ]*\\d{4}'
THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.9 THEN 'PHONE_USA'
- WHEN CAST(SUM( CASE WHEN `{COL_NAME}` RLIKE '[_a-zA-Z0-9.-]+@[a-zA-Z0-9.-]+.[a-zA-Z][a-zA-Z]+'
+ WHEN CAST(SUM( CASE WHEN `{COL_NAME}` RLIKE '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$'
+ AND `{COL_NAME}` NOT LIKE '%://%'
THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.9 THEN 'EMAIL'
WHEN CAST(SUM( CASE WHEN TRANSLATE(`{COL_NAME}`,'012345678','999999999') IN ('99999', '999999999', '99999-9999')
THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.9 THEN 'ZIP_USA'
@@ -113,7 +115,7 @@
OR `{COL_NAME}` LIKE '%.pdf'
OR `{COL_NAME}` LIKE '%.xlsx')
THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.9 THEN 'FILE_NAME'
- WHEN CAST(SUM( CASE WHEN `{COL_NAME}` RLIKE '[0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9]'
+ WHEN CAST(SUM( CASE WHEN `{COL_NAME}` RLIKE '^([0-9]{4}[- ]?){3}[0-9]{4}$'
THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.8 THEN 'CREDIT_CARD'
WHEN CAST(SUM( CASE WHEN ( `{COL_NAME}` LIKE '%,%,%,%'
OR `{COL_NAME}` LIKE '%|%|%|%'
@@ -123,30 +125,32 @@
OR `{COL_NAME}` LIKE '% but %'
OR `{COL_NAME}` LIKE '% or %'
OR `{COL_NAME}` LIKE '% yet %' )
- AND COALESCE(CAST(LEN(`{COL_NAME}`) - LEN(REPLACE(`{COL_NAME}`, ',', '')) as FLOAT)
- / CAST(NULLIF(LEN(`{COL_NAME}`) - LEN(REPLACE(`{COL_NAME}`, ' ', '')), 0) as FLOAT), 1) > 0.6
+ AND COALESCE(CAST(LEN(`{COL_NAME}`) - LEN(REPLACE(`{COL_NAME}`, ',', '')) AS FLOAT)
+ / CAST(NULLIF(LEN(`{COL_NAME}`) - LEN(REPLACE(`{COL_NAME}`, ' ', '')), 0) AS FLOAT), 1) > 0.6
THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.8 THEN 'DELIMITED_DATA'
WHEN CAST(SUM ( CASE WHEN `{COL_NAME}` RLIKE '[0-8][0-9][0-9][- ][0-9][0-9][- ][0-9][0-9][0-9][0-9]'
AND LEFT(`{COL_NAME}`, 3) NOT BETWEEN '734' AND '749'
AND LEFT(`{COL_NAME}`, 3) <> '666' THEN 1 END) AS FLOAT)/CAST(COUNT(`{COL_NAME}`) AS FLOAT) > 0.9 THEN 'SSN'
- END as std_pattern_match,
-05_else: NULL as distinct_std_value_ct,
- NULL as zero_length_ct,
- NULL as lead_space_ct,
- NULL as quoted_value_ct,
- NULL as includes_digit_ct,
- NULL as filled_value_ct,
- NULL as min_text,
- NULL as max_text,
- NULL as upper_case_ct,
- NULL as lower_case_ct,
- NULL as non_alpha_ct,
- NULL as non_printing_ct,
- NULL as numeric_ct,
- NULL as date_ct,
- NULL as std_pattern_match,
-
-06_A: (SELECT CONCAT_WS(' | ', collect_list(ct_pattern))
+ END AS std_pattern_match,
+-- TG-ELSE
+ NULL AS distinct_std_value_ct,
+ NULL AS zero_length_ct,
+ NULL AS lead_space_ct,
+ NULL AS quoted_value_ct,
+ NULL AS includes_digit_ct,
+ NULL AS filled_value_ct,
+ NULL AS min_text,
+ NULL AS max_text,
+ NULL AS upper_case_ct,
+ NULL AS lower_case_ct,
+ NULL AS non_alpha_ct,
+ NULL AS non_printing_ct,
+ NULL AS numeric_ct,
+ NULL AS date_ct,
+ NULL AS std_pattern_match,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT CONCAT_WS(' | ', collect_list(ct_pattern))
FROM (
SELECT
TRANSLATE(
@@ -165,33 +169,39 @@
ORDER BY ct DESC
LIMIT 5
)) AS top_patterns,
-06_else: NULL as top_patterns,
-
-08_N: MIN(`{COL_NAME}`) AS min_value,
- MIN(CASE WHEN `{COL_NAME}` > 0 THEN `{COL_NAME}` ELSE NULL END) AS min_value_over_0,
- MAX(`{COL_NAME}`) AS max_value,
+-- TG-ELSE
+ NULL AS top_patterns,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ MIN(`{COL_NAME}`) AS min_value,
+ MIN(CASE WHEN `{COL_NAME}` > 0 THEN `{COL_NAME}` ELSE NULL END) AS min_value_over_0,
+ MAX(`{COL_NAME}`) AS max_value,
AVG(CAST(`{COL_NAME}` AS FLOAT)) AS avg_value,
STDDEV_SAMP(CAST(`{COL_NAME}` AS FLOAT)) AS stdev_value,
- MIN(pct_25) as percentile_25,
- MIN(pct_50) as percentile_50,
- MIN(pct_75) as percentile_75,
-08_else: NULL as min_value,
- NULL as min_value_over_0,
- NULL as max_value,
- NULL as avg_value,
- NULL as stdev_value,
- NULL as percentile_25,
- NULL as percentile_50,
- NULL as percentile_75,
-
-10_N_dec: SUM(ROUND(ABS(MOD(`{COL_NAME}`, 1)), 5)) as fractional_sum,
-10_else: NULL as fractional_sum,
-
-11_D: CASE
+ MIN(pct_25) AS percentile_25,
+ MIN(pct_50) AS percentile_50,
+ MIN(pct_75) AS percentile_75,
+-- TG-ELSE
+ NULL AS min_value,
+ NULL AS min_value_over_0,
+ NULL AS max_value,
+ NULL AS avg_value,
+ NULL AS stdev_value,
+ NULL AS percentile_25,
+ NULL AS percentile_50,
+ NULL AS percentile_75,
+-- TG-ENDIF
+-- TG-IF is_N_decimal
+ SUM(ROUND(ABS(MOD(`{COL_NAME}`, 1)), 5)) AS fractional_sum,
+-- TG-ELSE
+ NULL AS fractional_sum,
+-- TG-ENDIF
+-- TG-IF is_type_D
+ CASE
WHEN MIN(`{COL_NAME}`) IS NULL THEN NULL
- ELSE CASE WHEN MIN(`{COL_NAME}`) >= CAST('0001-01-01' as date) THEN MIN(`{COL_NAME}`) ELSE CAST('0001-01-01' as date) END
- END as min_date,
- MAX(`{COL_NAME}`) as max_date,
+ ELSE CASE WHEN MIN(`{COL_NAME}`) >= CAST('0001-01-01' AS date) THEN MIN(`{COL_NAME}`) ELSE CAST('0001-01-01' AS date) END
+ END AS min_date,
+ MAX(`{COL_NAME}`) AS max_date,
SUM(CASE
WHEN <%DATEDIFF_MONTH; `{COL_NAME}`; '{RUN_DATE}'::TIMESTAMP%> > 12 THEN 1
ELSE 0
@@ -223,53 +233,57 @@
WHEN <%DATEDIFF_MONTH; `{COL_NAME}`; '{RUN_DATE}'::TIMESTAMP%> > 240 THEN 1
ELSE 0
END) AS distant_future_date_ct,
- COUNT(DISTINCT <%DATEDIFF_DAY; `{COL_NAME}`; '{RUN_DATE}'::DATE%>) as date_days_present,
- COUNT(DISTINCT <%DATEDIFF_WEEK; `{COL_NAME}`; '{RUN_DATE}'::DATE%>) as date_weeks_present,
- COUNT(DISTINCT <%DATEDIFF_MONTH; `{COL_NAME}`; '{RUN_DATE}'::DATE%>) as date_months_present,
-11_else: NULL as min_date,
- NULL as max_date,
- NULL as before_1yr_date_ct,
- NULL as before_5yr_date_ct,
- NULL as before_20yr_date_ct,
+ COUNT(DISTINCT <%DATEDIFF_DAY; `{COL_NAME}`; '{RUN_DATE}'::DATE%>) AS date_days_present,
+ COUNT(DISTINCT <%DATEDIFF_WEEK; `{COL_NAME}`; '{RUN_DATE}'::DATE%>) AS date_weeks_present,
+ COUNT(DISTINCT <%DATEDIFF_MONTH; `{COL_NAME}`; '{RUN_DATE}'::DATE%>) AS date_months_present,
+-- TG-ELSE
+ NULL AS min_date,
+ NULL AS max_date,
+ NULL AS before_1yr_date_ct,
+ NULL AS before_5yr_date_ct,
+ NULL AS before_20yr_date_ct,
NULL AS before_100yr_date_ct,
- NULL as within_1yr_date_ct,
- NULL as within_1mo_date_ct,
- NULL as future_date_ct,
- NULL as distant_future_date_ct,
- NULL as date_days_present,
- NULL as date_weeks_present,
- NULL as date_months_present,
-
-12_B: SUM(CAST(`{COL_NAME}` AS INTEGER)) AS boolean_true_ct,
-12_else: NULL as boolean_true_ct,
-
-14_A: ( SELECT COUNT(DISTINCT TRANSLATE(`{COL_NAME}`,
+ NULL AS within_1yr_date_ct,
+ NULL AS within_1mo_date_ct,
+ NULL AS future_date_ct,
+ NULL AS distant_future_date_ct,
+ NULL AS date_days_present,
+ NULL AS date_weeks_present,
+ NULL AS date_months_present,
+-- TG-ENDIF
+-- TG-IF is_type_B
+ SUM(CAST(`{COL_NAME}` AS INTEGER)) AS boolean_true_ct,
+-- TG-ELSE
+ NULL AS boolean_true_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT COUNT(DISTINCT TRANSLATE(`{COL_NAME}`,
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
'aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAANNNNNNNNNN'
)
) AS pattern_ct
FROM target_table
- WHERE `{COL_NAME}` > ' ' ) AS distinct_pattern_ct,
+ WHERE `{COL_NAME}` > ' ' ) AS distinct_pattern_ct,
SUM(CAST(SIGN(LEN(TRIM(`{COL_NAME}`)) - LEN(REPLACE(TRIM(`{COL_NAME}`),' ',''))) AS BIGINT)) AS embedded_space_ct,
- AVG(CAST(LEN(TRIM(`{COL_NAME}`)) - LEN(REPLACE(TRIM(`{COL_NAME}`),' ','')) AS FLOAT)) AS avg_embedded_spaces,
-14_else: NULL as distinct_pattern_ct,
- NULL as embedded_space_ct,
- NULL as avg_embedded_spaces,
-
-16_all: " '{PROFILE_RUN_ID}' as profile_run_id"
-
-98_all: ' FROM target_table'
-
-99_N: |
- , (SELECT
+ AVG(CAST(LEN(TRIM(`{COL_NAME}`)) - LEN(REPLACE(TRIM(`{COL_NAME}`),' ','')) AS FLOAT)) AS avg_embedded_spaces,
+-- TG-ELSE
+ NULL AS distinct_pattern_ct,
+ NULL AS embedded_space_ct,
+ NULL AS avg_embedded_spaces,
+-- TG-ENDIF
+ '{PROFILE_RUN_ID}' AS profile_run_id
+ FROM target_table
+-- TG-IF is_N_sampling
+ , (SELECT
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY `{COL_NAME}`) OVER () AS pct_25,
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY `{COL_NAME}`) OVER () AS pct_50,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY `{COL_NAME}`) OVER () AS pct_75
- FROM `{DATA_SCHEMA}`.`{DATA_TABLE}` LIMIT 1) pctile
-99_N_sampling: |
- , (SELECT
+ FROM `{DATA_SCHEMA}`.`{DATA_TABLE}` TABLESAMPLE ({SAMPLE_PERCENT_CALC} PERCENT) LIMIT 1 ) pctile
+-- TG-ENDIF
+-- TG-IF is_N_no_sampling
+ , (SELECT
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY `{COL_NAME}`) OVER () AS pct_25,
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY `{COL_NAME}`) OVER () AS pct_50,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY `{COL_NAME}`) OVER () AS pct_75
- FROM `{DATA_SCHEMA}`.`{DATA_TABLE}` TABLESAMPLE ({SAMPLE_PERCENT_CALC} PERCENT) LIMIT 1 ) pctile
-99_else: ' '
+ FROM `{DATA_SCHEMA}`.`{DATA_TABLE}` LIMIT 1) pctile
+-- TG-ENDIF
diff --git a/testgen/template/flavors/databricks/profiling/templated_functions.yaml b/testgen/template/flavors/databricks/profiling/templated_functions.yaml
index a7706e26..24cd7fd5 100644
--- a/testgen/template/flavors/databricks/profiling/templated_functions.yaml
+++ b/testgen/template/flavors/databricks/profiling/templated_functions.yaml
@@ -18,6 +18,6 @@ IS_DATE: CASE
DATEDIFF_MONTH: (YEAR({$2}) * 12 + MONTH({$2}) - YEAR({$1}) * 12 - MONTH({$1}))
-DATEDIFF_WEEK: CAST(DATEDIFF(DATE_TRUNC('week', {$2} + INTERVAL 1 DAY), DATE_TRUNC('week', {$1} + INTERVAL 1 DAY)) / 7 AS INT)
+DATEDIFF_WEEK: CAST(DATEDIFF(DATE_TRUNC('week', {$2}), DATE_TRUNC('week', {$1})) / 7 AS INT)
DATEDIFF_DAY: EXTRACT(DAY FROM DATE({$2}) - DATE({$1}))
diff --git a/testgen/template/flavors/mssql/profiling/project_profiling_query.yaml b/testgen/template/flavors/mssql/profiling/project_profiling_query.sql
similarity index 56%
rename from testgen/template/flavors/mssql/profiling/project_profiling_query.yaml
rename to testgen/template/flavors/mssql/profiling/project_profiling_query.sql
index 77ec98c8..b1313712 100644
--- a/testgen/template/flavors/mssql/profiling/project_profiling_query.yaml
+++ b/testgen/template/flavors/mssql/profiling/project_profiling_query.sql
@@ -1,18 +1,14 @@
----
-01_sampling: |
- WITH target_table AS (
- SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}" TABLESAMPLE ({SAMPLE_PERCENT_CALC} PERCENT) WITH (NOLOCK)
- )
- SELECT
-01_else: |
- WITH target_table AS (
- SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}" WITH (NOLOCK)
- )
- SELECT
-01_all: |
- {CONNECTION_ID} as connection_id,
- '{PROJECT_CODE}' as project_code,
- '{TABLE_GROUPS_ID}' as table_groups_id,
+WITH target_table AS (
+-- TG-IF do_sample
+ SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}" TABLESAMPLE ({SAMPLE_PERCENT_CALC} PERCENT) WITH (NOLOCK)
+-- TG-ELSE
+ SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}" WITH (NOLOCK)
+-- TG-ENDIF
+)
+SELECT
+ {CONNECTION_ID} AS connection_id,
+ '{PROJECT_CODE}' AS project_code,
+ '{TABLE_GROUPS_ID}' AS table_groups_id,
'{DATA_SCHEMA}' AS schema_name,
'{RUN_DATE}' AS run_date,
'{DATA_TABLE}' AS table_name,
@@ -22,49 +18,61 @@
'{DB_DATA_TYPE}' AS db_data_type,
'{COL_GEN_TYPE}' AS general_type,
COUNT(*) AS record_ct,
-
-02_X: |
+-- TG-IF is_type_X
COUNT(CASE WHEN "{COL_NAME}" IS NOT NULL THEN 1 END) AS value_ct,
NULL AS distinct_value_ct,
- SUM(CASE WHEN "{COL_NAME}" IS NULL THEN 1 ELSE 0 END) AS null_value_ct,
-02_else: |
+-- TG-ELSE
COUNT("{COL_NAME}") AS value_ct,
COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
+-- TG-ENDIF
SUM(CASE WHEN "{COL_NAME}" IS NULL THEN 1 ELSE 0 END) AS null_value_ct,
-
-03_ADN: MIN(LEN("{COL_NAME}")) AS min_length,
- MAX(LEN("{COL_NAME}")) AS max_length,
+-- TG-IF is_type_ADN
+ MIN(LEN("{COL_NAME}")) AS min_length,
+ MAX(LEN("{COL_NAME}")) AS max_length,
AVG(CAST(NULLIF(LEN("{COL_NAME}"), 0) AS FLOAT)) AS avg_length,
-03_else: NULL as min_length,
- NULL as max_length,
- NULL as avg_length,
-
-04_A: SUM(CASE
+-- TG-ELSE
+ NULL AS min_length,
+ NULL AS max_length,
+ NULL AS avg_length,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ SUM(CASE
WHEN LTRIM(RTRIM("{COL_NAME}")) LIKE '0([.]0*)' THEN 1 ELSE 0
- END) AS zero_value_ct,
-04_N: CAST(SUM( 1 - ABS(SIGN("{COL_NAME}")))AS BIGINT ) AS zero_value_ct,
-04_else: NULL as zero_value_ct,
-
-05_A: COUNT(DISTINCT UPPER(REPLACE(TRANSLATE("{COL_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ',''))) as distinct_std_value_ct,
+ END) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ CAST(SUM( 1 - ABS(SIGN("{COL_NAME}")))AS BIGINT ) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_not_A_not_N
+ NULL AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ COUNT(DISTINCT UPPER(REPLACE(TRANSLATE("{COL_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ',''))) AS distinct_std_value_ct,
SUM(CASE
WHEN "{COL_NAME}" = '' THEN 1
- ELSE 0
- END) AS zero_length_ct,
- SUM( CASE
- WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1
- ELSE 0
- END ) AS lead_space_ct,
- SUM( CASE WHEN "{COL_NAME}" LIKE '"%"' OR "{COL_NAME}" LIKE '''%''' THEN 1 ELSE 0 END ) as quoted_value_ct,
- SUM( CASE WHEN "{COL_NAME}" LIKE '%[0-9]%' THEN 1 ELSE 0 END ) as includes_digit_ct,
- SUM( CASE
- WHEN "{COL_NAME}" IN ('.', '?') OR "{COL_NAME}" LIKE ' ' THEN 1
+ ELSE 0
+ END) AS zero_length_ct,
+ SUM(CASE
+ WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1
+ ELSE 0
+ END) AS lead_space_ct,
+ SUM(CASE WHEN "{COL_NAME}" LIKE '"%"' OR "{COL_NAME}" LIKE '''%''' THEN 1 ELSE 0 END ) AS quoted_value_ct,
+ SUM(CASE WHEN "{COL_NAME}" LIKE '%[0-9]%' THEN 1 ELSE 0 END ) AS includes_digit_ct,
+ SUM(CASE
+ WHEN LEN("{COL_NAME}") > 0
+ AND ((LEN(REPLACE("{COL_NAME}", '.', ''))= 0 )
+ OR (LEN(REPLACE("{COL_NAME}", '-', ''))= 0 )
+ OR (LEN(REPLACE("{COL_NAME}", '?', ''))= 0 )
+ ) THEN 1
+ WHEN DATALENGTH("{COL_NAME}") > 0
+ AND LEN(LTRIM(RTRIM("{COL_NAME}")))= 0
+ THEN 1
WHEN LEN("{COL_NAME}") > 1
- AND ( LOWER("{COL_NAME}") LIKE '%..%' OR LOWER("{COL_NAME}") LIKE '%--%'
- OR (LEN(REPLACE("{COL_NAME}", '0', ''))= 0 )
+ AND ((LEN(REPLACE("{COL_NAME}", '0', ''))= 0 )
OR (LEN(REPLACE("{COL_NAME}", '9', ''))= 0 )
OR (LEN(REPLACE(LOWER("{COL_NAME}"), 'x', ''))= 0 )
OR (LEN(REPLACE(LOWER("{COL_NAME}"), 'z', ''))= 0 )
- ) THEN 1
+ ) THEN 1
WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
'n/a','#na','none','null','unknown') THEN 1
WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
@@ -72,40 +80,41 @@
WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
'[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
ELSE 0
- END ) AS filled_value_ct,
- LEFT(MIN(NULLIF("{COL_NAME}", '') COLLATE Latin1_General_BIN ), 100) AS min_text,
- LEFT(MAX(NULLIF("{COL_NAME}", '') COLLATE Latin1_General_BIN ), 100) AS max_text,
+ END) AS filled_value_ct,
+ LEFT(MIN(NULLIF("{COL_NAME}", '') COLLATE Latin1_General_BIN ), 100) AS min_text,
+ LEFT(MAX(NULLIF("{COL_NAME}", '') COLLATE Latin1_General_BIN ), 100) AS max_text,
SUM(CASE
- WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = "{COL_NAME}" THEN 0
- WHEN TRANSLATE("{COL_NAME}", 'abcdefghijklmnopqrstuvwxyz', ' ') = "{COL_NAME}" THEN 1
+ WHEN "{COL_NAME}" COLLATE Latin1_General_BIN = UPPER("{COL_NAME}")
+ AND "{COL_NAME}" COLLATE Latin1_General_BIN <> LOWER("{COL_NAME}") THEN 1
ELSE 0
END) AS upper_case_ct,
SUM(CASE
- WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = "{COL_NAME}" THEN 0
- WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', ' ') = "{COL_NAME}" THEN 1
+ WHEN "{COL_NAME}" COLLATE Latin1_General_BIN = LOWER("{COL_NAME}")
+ AND "{COL_NAME}" COLLATE Latin1_General_BIN <> UPPER("{COL_NAME}") THEN 1
ELSE 0
END) AS lower_case_ct,
SUM(CASE
WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = "{COL_NAME}" THEN 1
ELSE 0
END) AS non_alpha_ct,
- COUNT( CASE WHEN TRANSLATE("{COL_NAME}", NCHAR(160), 'X') <> "{COL_NAME}" THEN 1 END) as non_printing_ct,
- SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>) AS numeric_ct,
- SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>) AS date_ct,
+ COUNT(CASE WHEN TRANSLATE("{COL_NAME}", NCHAR(160), 'X') <> "{COL_NAME}" THEN 1 END) AS non_printing_ct,
+ SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>) AS numeric_ct,
+ SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>) AS date_ct,
CASE
- WHEN CAST(SUM( CASE WHEN UPPER("{COL_NAME}") LIKE '[1-9]% [A-Z]% %'
+ WHEN CAST(SUM(CASE WHEN UPPER("{COL_NAME}") LIKE '[1-9]% [A-Z]% %'
AND CHARINDEX(' ', "{COL_NAME}") BETWEEN 2 and 6 THEN 1
- END ) as FLOAT) /CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'STREET_ADDR'
+ END) AS FLOAT) /CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'STREET_ADDR'
WHEN CAST(SUM(CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'STATE_USA'
- WHEN CAST(SUM( CASE WHEN ("{COL_NAME}" LIKE '[+]1%[0-9][0-9][0-9]%[-. ][0-9][0-9][0-9]%[0-9][0-9][0-9][0-9,0-9,0-9,0-9,0-9,0-9]' AND "{COL_NAME}" NOT LIKE '%[^0-9+()-]%')
+ WHEN CAST(SUM(CASE WHEN ("{COL_NAME}" LIKE '[+]1%[0-9][0-9][0-9]%[-. ][0-9][0-9][0-9]%[0-9][0-9][0-9][0-9,0-9,0-9,0-9,0-9,0-9]' AND "{COL_NAME}" NOT LIKE '%[^0-9+()-]%')
OR ("{COL_NAME}" LIKE '[+]1%[0-9][0-9][0-9][-. ][0-9][0-9][0-9][-. ][0-9][0-9][0-9][0-9]' AND "{COL_NAME}" NOT LIKE '%[^0-9+-]%')
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'PHONE_USA'
- WHEN CAST(SUM( CASE WHEN "{COL_NAME}" LIKE '%[_a-zA-Z0-9.-]%@%[a-zA-Z0-9.-]%.[a-zA-Z][a-zA-Z]%'
+ WHEN CAST(SUM(CASE WHEN "{COL_NAME}" LIKE '%[_a-zA-Z0-9.-]%@%[a-zA-Z0-9.-]%.[a-zA-Z][a-zA-Z]%'
+ AND "{COL_NAME}" NOT LIKE '%://%'
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'EMAIL'
- WHEN CAST(SUM( CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
+ WHEN CAST(SUM(CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'ZIP_USA'
- WHEN CAST(SUM( CASE WHEN "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS NOT LIKE ' %'
+ WHEN CAST(SUM(CASE WHEN "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS NOT LIKE ' %'
AND "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '[a-z0-9 _-]%'
AND ("{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.txt'
OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.csv'
@@ -115,9 +124,10 @@
OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.pdf'
OR "{COL_NAME}" COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%.xlsx')
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'FILE_NAME'
- WHEN CAST(SUM( CASE WHEN "{COL_NAME}" LIKE '[0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9]'
+ WHEN CAST(SUM(CASE WHEN "{COL_NAME}" LIKE '[0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9][- ][0-9][0-9][0-9][0-9]'
+ OR "{COL_NAME}" LIKE '[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]'
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'CREDIT_CARD'
- WHEN CAST(SUM( CASE WHEN ( "{COL_NAME}" LIKE '%,%,%,%'
+ WHEN CAST(SUM(CASE WHEN ( "{COL_NAME}" LIKE '%,%,%,%'
OR "{COL_NAME}" LIKE '%|%|%|%'
OR "{COL_NAME}" LIKE '%^%^%^%'
OR "{COL_NAME}" LIKE '%' + CHAR(9) + '%' + CHAR(9) + '%' + CHAR(9) + '%' )
@@ -125,37 +135,39 @@
OR "{COL_NAME}" LIKE '% but %'
OR "{COL_NAME}" LIKE '% or %'
OR "{COL_NAME}" LIKE '% yet %' )
- AND ISNULL(CAST(LEN("{COL_NAME}") - LEN(REPLACE("{COL_NAME}", ',', '')) as FLOAT)
- / CAST(NULLIF(LEN("{COL_NAME}") - LEN(REPLACE("{COL_NAME}", ' ', '')), 0) as FLOAT), 1) > 0.6
+ AND ISNULL(CAST(LEN("{COL_NAME}") - LEN(REPLACE("{COL_NAME}", ',', '')) AS FLOAT)
+ / CAST(NULLIF(LEN("{COL_NAME}") - LEN(REPLACE("{COL_NAME}", ' ', '')), 0) AS FLOAT), 1) > 0.6
THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'DELIMITED_DATA'
- WHEN CAST(SUM ( CASE WHEN "{COL_NAME}" LIKE '[0-8][0-9][0-9][- ][0-9][0-9][- ][0-9][0-9][0-9][0-9]'
+ WHEN CAST(SUM (CASE WHEN "{COL_NAME}" LIKE '[0-8][0-9][0-9][- ][0-9][0-9][- ][0-9][0-9][0-9][0-9]'
AND LEFT("{COL_NAME}", 3) NOT BETWEEN '734' AND '749'
AND LEFT("{COL_NAME}", 3) <> '666' THEN 1 END) AS FLOAT)/CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'SSN'
- END as std_pattern_match,
-05_else: NULL as distinct_std_value_ct,
- NULL as zero_length_ct,
- NULL as lead_space_ct,
- NULL as quoted_value_ct,
- NULL as includes_digit_ct,
- NULL as filled_value_ct,
- NULL as min_text,
- NULL as max_text,
- NULL as upper_case_ct,
- NULL as lower_case_ct,
- NULL as non_alpha_ct,
- NULL as non_printing_ct,
- NULL as numeric_ct,
- NULL as date_ct,
- NULL as std_pattern_match,
-
-06_A: ( SELECT LEFT(STRING_AGG(pattern, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1000) AS concat_pats
+ END AS std_pattern_match,
+-- TG-ELSE
+ NULL AS distinct_std_value_ct,
+ NULL AS zero_length_ct,
+ NULL AS lead_space_ct,
+ NULL AS quoted_value_ct,
+ NULL AS includes_digit_ct,
+ NULL AS filled_value_ct,
+ NULL AS min_text,
+ NULL AS max_text,
+ NULL AS upper_case_ct,
+ NULL AS lower_case_ct,
+ NULL AS non_alpha_ct,
+ NULL AS non_printing_ct,
+ NULL AS numeric_ct,
+ NULL AS date_ct,
+ NULL AS std_pattern_match,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT LEFT(STRING_AGG(pattern, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1000) AS concat_pats
FROM (
SELECT TOP 5 CAST(COUNT(*) AS VARCHAR(40)) + ' | ' + pattern AS pattern,
COUNT(*) AS ct
- FROM ( SELECT TRANSLATE("{COL_NAME}" COLLATE Latin1_General_BIN,
+ FROM (SELECT TRANSLATE("{COL_NAME}" COLLATE Latin1_General_BIN,
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
'aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAANNNNNNNNNN' )
- AS pattern
+ AS pattern
FROM target_table
WHERE "{COL_NAME}" > ' ' AND ((SELECT MAX(LEN("{COL_NAME}"))
FROM target_table) BETWEEN 3 and {MAX_PATTERN_LENGTH})) p
@@ -163,33 +175,39 @@
HAVING pattern > ' '
ORDER BY COUNT(*) DESC
) ps) AS top_patterns,
-06_else: NULL as top_patterns,
-
-08_N: MIN("{COL_NAME}") AS min_value,
- MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
- MAX("{COL_NAME}") AS max_value,
- AVG(CAST("{COL_NAME}" AS FLOAT)) AS avg_value,
- STDEV(CAST("{COL_NAME}" AS FLOAT)) AS stdev_value,
- MIN(pct_25) as percentile_25,
- MIN(pct_50) as percentile_50,
- MIN(pct_75) as percentile_75,
-08_else: NULL as min_value,
- NULL as min_value_over_0,
- NULL as max_value,
- NULL as avg_value,
- NULL as stdev_value,
- NULL as percentile_25,
- NULL as percentile_50,
- NULL as percentile_75,
-
-10_N_dec: SUM(ROUND(ABS(("{COL_NAME}" % 1)), 5)) as fractional_sum,
-10_else: NULL as fractional_sum,
-
-11_D: CASE
+-- TG-ELSE
+ NULL AS top_patterns,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ MIN("{COL_NAME}") AS min_value,
+ MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
+ MAX("{COL_NAME}") AS max_value,
+ AVG(CAST("{COL_NAME}" AS FLOAT)) AS avg_value,
+ STDEV(CAST("{COL_NAME}" AS FLOAT)) AS stdev_value,
+ MIN(pct_25) AS percentile_25,
+ MIN(pct_50) AS percentile_50,
+ MIN(pct_75) AS percentile_75,
+-- TG-ELSE
+ NULL AS min_value,
+ NULL AS min_value_over_0,
+ NULL AS max_value,
+ NULL AS avg_value,
+ NULL AS stdev_value,
+ NULL AS percentile_25,
+ NULL AS percentile_50,
+ NULL AS percentile_75,
+-- TG-ENDIF
+-- TG-IF is_N_decimal
+ SUM(ROUND(ABS(("{COL_NAME}" % 1)), 5)) AS fractional_sum,
+-- TG-ELSE
+ NULL AS fractional_sum,
+-- TG-ENDIF
+-- TG-IF is_type_D
+ CASE
WHEN MIN("{COL_NAME}") IS NULL THEN NULL
- ELSE CASE WHEN MIN("{COL_NAME}") >= CAST('0001-01-01' as date) THEN MIN("{COL_NAME}") ELSE CAST('0001-01-01' as date) END
- END as min_date,
- MAX("{COL_NAME}") as max_date,
+ ELSE CASE WHEN MIN("{COL_NAME}") >= CAST('0001-01-01' AS date) THEN MIN("{COL_NAME}") ELSE CAST('0001-01-01' AS date) END
+ END AS min_date,
+ MAX("{COL_NAME}") AS max_date,
SUM(CASE
WHEN DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}') > 12 THEN 1
ELSE 0
@@ -221,53 +239,57 @@
WHEN DATEDIFF(month, '{RUN_DATE}', "{COL_NAME}") > 240 THEN 1
ELSE 0
END) AS distant_future_date_ct,
- COUNT(DISTINCT DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}' ) ) as date_days_present,
- COUNT(DISTINCT DATEDIFF(week, "{COL_NAME}", '{RUN_DATE}' ) ) as date_weeks_present,
- COUNT(DISTINCT DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}' ) ) as date_months_present,
-11_else: NULL as min_date,
- NULL as max_date,
- NULL as before_1yr_date_ct,
- NULL as before_5yr_date_ct,
- NULL as before_20yr_date_ct,
+ COUNT(DISTINCT DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}' ) ) AS date_days_present,
+ COUNT(DISTINCT DATEDIFF(week, DATEADD(day, -1, "{COL_NAME}"), DATEADD(day, -1, CAST('{RUN_DATE}' AS DATE)) ) ) AS date_weeks_present,
+ COUNT(DISTINCT DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}' ) ) AS date_months_present,
+-- TG-ELSE
+ NULL AS min_date,
+ NULL AS max_date,
+ NULL AS before_1yr_date_ct,
+ NULL AS before_5yr_date_ct,
+ NULL AS before_20yr_date_ct,
NULL AS before_100yr_date_ct,
- NULL as within_1yr_date_ct,
- NULL as within_1mo_date_ct,
- NULL as future_date_ct,
- NULL as distant_future_date_ct,
- NULL as date_days_present,
- NULL as date_weeks_present,
- NULL as date_months_present,
-
-12_B: SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
-12_else: NULL as boolean_true_ct,
-
-14_A: ( SELECT COUNT(DISTINCT TRANSLATE("{COL_NAME}" COLLATE Latin1_General_BIN,
+ NULL AS within_1yr_date_ct,
+ NULL AS within_1mo_date_ct,
+ NULL AS future_date_ct,
+ NULL AS distant_future_date_ct,
+ NULL AS date_days_present,
+ NULL AS date_weeks_present,
+ NULL AS date_months_present,
+-- TG-ENDIF
+-- TG-IF is_type_B
+ SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
+-- TG-ELSE
+ NULL AS boolean_true_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT COUNT(DISTINCT TRANSLATE("{COL_NAME}" COLLATE Latin1_General_BIN,
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
'aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAANNNNNNNNNN'
)
) AS pattern_ct
FROM target_table
- WHERE "{COL_NAME}" > ' ' ) AS distinct_pattern_ct,
+ WHERE "{COL_NAME}" > ' ' ) AS distinct_pattern_ct,
SUM(CAST(SIGN(LEN(RTRIM(LTRIM("{COL_NAME}"))) - LEN(REPLACE(RTRIM(LTRIM("{COL_NAME}")),' ',''))) AS BIGINT)) AS embedded_space_ct,
- AVG(CAST(LEN(RTRIM(LTRIM("{COL_NAME}"))) - LEN(REPLACE(RTRIM(LTRIM("{COL_NAME}")),' ','')) AS FLOAT)) AS avg_embedded_spaces,
-14_else: NULL as distinct_pattern_ct,
- NULL as embedded_space_ct,
- NULL as avg_embedded_spaces,
-
-16_all: " '{PROFILE_RUN_ID}' as profile_run_id"
-
-98_all: ' FROM target_table '
-
-99_N: |
- , (SELECT TOP 1
+ AVG(CAST(LEN(RTRIM(LTRIM("{COL_NAME}"))) - LEN(REPLACE(RTRIM(LTRIM("{COL_NAME}")),' ','')) AS FLOAT)) AS avg_embedded_spaces,
+-- TG-ELSE
+ NULL AS distinct_pattern_ct,
+ NULL AS embedded_space_ct,
+ NULL AS avg_embedded_spaces,
+-- TG-ENDIF
+ '{PROFILE_RUN_ID}' AS profile_run_id
+ FROM target_table
+-- TG-IF is_N_sampling
+ , (SELECT TOP 1
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
- FROM "{DATA_SCHEMA}"."{DATA_TABLE}" WITH (NOLOCK)) pctile
-99_N_sampling: |
- , (SELECT TOP 1
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" TABLESAMPLE ({SAMPLE_PERCENT_CALC} PERCENT) WITH (NOLOCK)) pctile
+-- TG-ENDIF
+-- TG-IF is_N_no_sampling
+ , (SELECT TOP 1
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
- FROM "{DATA_SCHEMA}"."{DATA_TABLE}" TABLESAMPLE ({SAMPLE_PERCENT_CALC} PERCENT) WITH (NOLOCK)) pctile
-99_else: ' '
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" WITH (NOLOCK)) pctile
+-- TG-ENDIF
diff --git a/testgen/template/flavors/oracle/data_chars/get_schema_ddf.sql b/testgen/template/flavors/oracle/data_chars/get_schema_ddf.sql
new file mode 100644
index 00000000..d4f4f578
--- /dev/null
+++ b/testgen/template/flavors/oracle/data_chars/get_schema_ddf.sql
@@ -0,0 +1,40 @@
+SELECT
+ c.owner AS schema_name,
+ c.table_name,
+ c.column_name,
+ CASE
+ WHEN c.data_type IN ('VARCHAR2', 'NVARCHAR2', 'CHAR', 'NCHAR') THEN 'char(' || c.data_length || ')'
+ WHEN c.data_type = 'NUMBER' AND c.data_precision IS NOT NULL AND c.data_scale = 0 THEN 'bigint'
+ WHEN c.data_type = 'NUMBER' AND c.data_precision IS NOT NULL THEN 'numeric(' || c.data_precision || ',' || c.data_scale || ')'
+ WHEN c.data_type = 'NUMBER' THEN 'int'
+ WHEN c.data_type IN ('FLOAT', 'BINARY_FLOAT', 'BINARY_DOUBLE') THEN 'numeric'
+ WHEN c.data_type LIKE 'TIMESTAMP%' THEN 'timestamp'
+ ELSE LOWER(c.data_type)
+ END AS column_type,
+ CASE
+ WHEN c.data_type IN ('VARCHAR2', 'NVARCHAR2', 'CHAR', 'NCHAR') THEN c.data_type || '(' || c.data_length || ')'
+ WHEN c.data_type = 'NUMBER' AND c.data_precision IS NOT NULL THEN 'NUMBER(' || c.data_precision || ',' || c.data_scale || ')'
+ WHEN c.data_type = 'FLOAT' THEN 'FLOAT(' || c.data_precision || ')'
+ ELSE c.data_type
+ END AS db_data_type,
+ c.column_id AS ordinal_position,
+ CASE
+ WHEN c.data_type IN ('VARCHAR2', 'NVARCHAR2', 'CHAR', 'NCHAR')
+ THEN 'A'
+ WHEN c.data_type = 'BOOLEAN'
+ THEN 'B'
+ WHEN c.data_type = 'DATE' OR c.data_type LIKE 'TIMESTAMP%'
+ THEN 'D'
+ WHEN c.data_type IN ('NUMBER', 'FLOAT', 'BINARY_FLOAT', 'BINARY_DOUBLE')
+ THEN 'N'
+ ELSE 'X'
+ END AS general_type,
+ CASE
+ WHEN c.data_type = 'NUMBER' AND c.data_scale > 0 THEN 1
+ ELSE 0
+ END AS is_decimal,
+ t.num_rows AS approx_record_ct
+FROM all_tab_columns c
+LEFT JOIN all_tables t ON c.owner = t.owner AND c.table_name = t.table_name
+WHERE c.owner = '{DATA_SCHEMA}' {TABLE_CRITERIA}
+ORDER BY c.owner, c.table_name, c.column_id
diff --git a/testgen/template/flavors/oracle/gen_query_tests/gen_Dupe_Rows.sql b/testgen/template/flavors/oracle/gen_query_tests/gen_Dupe_Rows.sql
new file mode 100644
index 00000000..1aa96960
--- /dev/null
+++ b/testgen/template/flavors/oracle/gen_query_tests/gen_Dupe_Rows.sql
@@ -0,0 +1,55 @@
+WITH latest_run AS (
+ -- Latest complete profiling run before as-of-date
+ SELECT MAX(run_date) AS last_run_date
+ FROM profile_results
+ WHERE table_groups_id = :TABLE_GROUPS_ID ::UUID
+ AND run_date::DATE <= :AS_OF_DATE ::DATE
+),
+selected_tables AS (
+ SELECT profile_run_id, schema_name, table_name,
+ STRING_AGG(:QUOTE || column_name || :QUOTE, ', ' ORDER BY position) AS groupby_names
+ FROM profile_results p
+ INNER JOIN latest_run lr ON p.run_date = lr.last_run_date
+ WHERE table_groups_id = :TABLE_GROUPS_ID ::UUID
+ -- Skip X types - Oracle does not allow grouping by types like BLOB, RAW, BFILE, CLOB, NCLOB, LONG
+ AND general_type <> 'X'
+ GROUP BY profile_run_id, schema_name, table_name
+)
+INSERT INTO test_definitions (
+ table_groups_id, test_suite_id, test_type,
+ schema_name, table_name,
+ test_active, last_auto_gen_date, profiling_as_of_date, profile_run_id,
+ groupby_names, skip_errors
+)
+SELECT
+ :TABLE_GROUPS_ID ::UUID AS table_groups_id,
+ :TEST_SUITE_ID ::UUID AS test_suite_id,
+ 'Dupe_Rows' AS test_type,
+ s.schema_name,
+ s.table_name,
+ 'Y' AS test_active,
+ :RUN_DATE ::TIMESTAMP AS last_auto_gen_date,
+ :AS_OF_DATE ::TIMESTAMP AS profiling_as_of_date,
+ s.profile_run_id,
+ s.groupby_names,
+ 0 AS skip_errors
+FROM selected_tables s
+ -- Only insert if test type is active
+WHERE EXISTS (SELECT 1 FROM test_types WHERE test_type = 'Dupe_Rows' AND active = 'Y')
+ -- Only insert if test type is included in generation set
+ AND EXISTS (SELECT 1 FROM generation_sets WHERE test_type = 'Dupe_Rows' AND generation_set = :GENERATION_SET)
+
+-- Match "uix_td_autogen_table" unique index exactly
+ON CONFLICT (test_suite_id, test_type, schema_name, table_name)
+WHERE last_auto_gen_date IS NOT NULL
+ AND table_name IS NOT NULL
+ AND column_name IS NULL
+
+-- Update tests if they already exist
+DO UPDATE SET
+ test_active = EXCLUDED.test_active,
+ last_auto_gen_date = EXCLUDED.last_auto_gen_date,
+ groupby_names = EXCLUDED.groupby_names,
+ skip_errors = EXCLUDED.skip_errors
+-- Ignore locked tests
+WHERE test_definitions.lock_refresh = 'N';
diff --git a/testgen/template/flavors/oracle/gen_query_tests/gen_Freshness_Trend.sql b/testgen/template/flavors/oracle/gen_query_tests/gen_Freshness_Trend.sql
new file mode 100644
index 00000000..d22e79d6
--- /dev/null
+++ b/testgen/template/flavors/oracle/gen_query_tests/gen_Freshness_Trend.sql
@@ -0,0 +1,193 @@
+WITH latest_run AS (
+ -- Latest complete profiling run before as-of-date
+ SELECT MAX(run_date) AS last_run_date
+ FROM profile_results
+ WHERE table_groups_id = :TABLE_GROUPS_ID ::UUID
+ AND run_date::DATE <= :AS_OF_DATE ::DATE
+),
+latest_results AS (
+ -- Column results for latest run
+ SELECT p.profile_run_id, p.schema_name, p.table_name, p.column_name,
+ p.functional_data_type, p.general_type,
+ p.distinct_value_ct, p.record_ct, p.null_value_ct,
+ p.max_value, p.min_value, p.avg_value, p.stdev_value
+ FROM profile_results p
+ INNER JOIN latest_run lr ON p.run_date = lr.last_run_date
+ INNER JOIN data_table_chars dtc ON (
+ dtc.table_groups_id = p.table_groups_id
+ AND dtc.schema_name = p.schema_name
+ AND dtc.table_name = p.table_name
+ -- Ignore dropped tables
+ AND dtc.drop_date IS NULL
+ )
+ WHERE p.table_groups_id = :TABLE_GROUPS_ID ::UUID
+),
+-- IDs - TOP 2
+id_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type, distinct_value_ct,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY
+ CASE
+ WHEN functional_data_type ILIKE 'ID-Unique%' THEN 1
+ WHEN functional_data_type = 'ID-Secondary' THEN 2
+ ELSE 3
+ END, distinct_value_ct DESC, column_name
+ ) AS rank
+ FROM latest_results
+ WHERE general_type IN ('A', 'D', 'N')
+ AND functional_data_type ILIKE 'ID%'
+),
+-- Process Date - TOP 1
+process_date_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type, distinct_value_ct,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY
+ CASE
+ WHEN column_name ILIKE '%mod%' THEN 1
+ WHEN column_name ILIKE '%up%' THEN 1
+ WHEN column_name ILIKE '%cr%' THEN 2
+ WHEN column_name ILIKE '%in%' THEN 2
+ END, distinct_value_ct DESC, column_name
+ ) AS rank
+ FROM latest_results
+ WHERE general_type IN ('A', 'D', 'N')
+ AND functional_data_type ILIKE 'process%'
+),
+-- Transaction Date - TOP 1
+tran_date_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type, distinct_value_ct,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY distinct_value_ct DESC, column_name
+ ) AS rank
+ FROM latest_results
+ WHERE general_type IN ('A', 'D', 'N')
+ AND functional_data_type ILIKE 'transactional date%'
+ OR functional_data_type ILIKE 'period%'
+ OR functional_data_type = 'timestamp'
+),
+-- Numeric Measures
+numeric_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type,
+ -- Weighted score
+ (
+ 0.25 * (distinct_value_ct * 1.0 / NULLIF(record_ct, 0)) +
+ 0.15 * ((max_value - min_value) / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) +
+ 0.10 * (LEAST(1, LOG(GREATEST(distinct_value_ct, 2))) / LOG(GREATEST(record_ct, 2))) +
+ 0.40 * (stdev_value / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) +
+ 0.10 * (1.0 - (null_value_ct * 1.0 / NULLIF(NULLIF(record_ct, 0), 1)))
+ ) AS change_detection_score
+ FROM latest_results
+ WHERE general_type = 'N'
+ AND (
+ functional_data_type ILIKE 'Measure%'
+ OR functional_data_type IN ('Sequence', 'Constant')
+ )
+),
+numeric_cols_ranked AS (
+ SELECT *,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY change_detection_score DESC, column_name
+ ) AS rank
+ FROM numeric_cols
+ WHERE change_detection_score IS NOT NULL
+),
+combined AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'ID' AS element_type, general_type, 10 + rank AS fingerprint_order
+ FROM id_cols
+ WHERE rank <= 2
+ UNION ALL
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'DATE_P' AS element_type, general_type, 20 + rank AS fingerprint_order
+ FROM process_date_cols
+ WHERE rank = 1
+ UNION ALL
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'DATE_T' AS element_type, general_type, 30 + rank AS fingerprint_order
+ FROM tran_date_cols
+ WHERE rank = 1
+ UNION ALL
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'MEAS' AS element_type, general_type, 40 + rank AS fingerprint_order
+ FROM numeric_cols_ranked
+ WHERE rank = 1
+),
+selected_tables AS (
+ SELECT profile_run_id, schema_name, table_name,
+ STRING_AGG(column_name, ',' ORDER BY element_type, fingerprint_order, column_name) AS column_names,
+ 'TO_CHAR(COUNT(*)) || ''|'' || ' ||
+ STRING_AGG(
+ REPLACE(
+ CASE
+ WHEN general_type = 'D' THEN 'TO_CHAR(MIN(@@@)) || ''|'' || TO_CHAR(MAX(@@@)) || ''|'' || TO_CHAR(COUNT(DISTINCT @@@))'
+ WHEN general_type = 'A' THEN 'TO_CHAR(MIN(@@@)) || ''|'' || TO_CHAR(MAX(@@@)) || ''|'' || TO_CHAR(COUNT(DISTINCT @@@)) || ''|'' || TO_CHAR(SUM(LENGTH(@@@)))'
+ WHEN general_type = 'N' THEN 'TO_CHAR(COUNT(@@@)) || ''|'' ||
+ TO_CHAR(COUNT(DISTINCT MOD(CAST(CAST(COALESCE(@@@,0) AS NUMBER(38,6)) * 1000000 AS NUMBER(38,0)), 1000003))) || ''|'' ||
+ COALESCE(TO_CHAR(CAST(MIN(@@@) AS NUMBER(38,6))), '''') || ''|'' ||
+ COALESCE(TO_CHAR(CAST(MAX(@@@) AS NUMBER(38,6))), '''') || ''|'' ||
+ COALESCE(TO_CHAR(MOD(COALESCE(SUM(MOD(CAST(ABS(COALESCE(@@@,0)) * 1000000 AS NUMBER), 1000000007)), 0), 1000000007)), '''') || ''|'' ||
+ COALESCE(TO_CHAR(MOD(COALESCE(SUM(MOD(CAST(ABS(COALESCE(@@@,0)) * 1000000 AS NUMBER), 1000000009)), 0), 1000000009)), '''')'
+ END,
+ '@@@', '"' || column_name || '"'
+ ),
+ ' || ''|'' || '
+ ORDER BY element_type, fingerprint_order, column_name
+ ) AS fingerprint
+ FROM combined
+ GROUP BY profile_run_id, schema_name, table_name
+)
+-- Insert tests for selected tables
+INSERT INTO test_definitions (
+ table_groups_id, test_suite_id, test_type,
+ schema_name, table_name, groupby_names,
+ test_active, last_auto_gen_date, profiling_as_of_date, profile_run_id,
+ history_calculation, history_lookback, custom_query
+)
+SELECT
+ :TABLE_GROUPS_ID ::UUID AS table_groups_id,
+ :TEST_SUITE_ID ::UUID AS test_suite_id,
+ 'Freshness_Trend' AS test_type,
+ s.schema_name,
+ s.table_name,
+ s.column_names AS groupby_names,
+ 'Y' AS test_active,
+ :RUN_DATE ::TIMESTAMP AS last_auto_gen_date,
+ :AS_OF_DATE ::TIMESTAMP AS profiling_as_of_date,
+ s.profile_run_id,
+ 'PREDICT' AS history_calculation,
+ NULL AS history_lookback,
+ s.fingerprint AS custom_query
+FROM selected_tables s
+ -- Only insert if test type is active
+WHERE EXISTS (SELECT 1 FROM test_types WHERE test_type = 'Freshness_Trend' AND active = 'Y')
+ -- Only insert if test type is included in generation set
+ AND EXISTS (SELECT 1 FROM generation_sets WHERE test_type = 'Freshness_Trend' AND generation_set = :GENERATION_SET)
+
+-- Match "uix_td_autogen_table" unique index exactly
+ON CONFLICT (test_suite_id, test_type, schema_name, table_name)
+WHERE last_auto_gen_date IS NOT NULL
+ AND table_name IS NOT NULL
+ AND column_name IS NULL
+
+-- Update tests if they already exist
+DO UPDATE SET
+ groupby_names = EXCLUDED.groupby_names,
+ test_active = EXCLUDED.test_active,
+ last_auto_gen_date = EXCLUDED.last_auto_gen_date,
+ profiling_as_of_date = EXCLUDED.profiling_as_of_date,
+ profile_run_id = EXCLUDED.profile_run_id,
+ history_calculation = EXCLUDED.history_calculation,
+ history_lookback = EXCLUDED.history_lookback,
+ custom_query = EXCLUDED.custom_query
+-- Ignore locked tests
+WHERE test_definitions.lock_refresh = 'N'
+ -- Don't update existing tests in "insert" mode
+ AND NOT COALESCE(:INSERT_ONLY, FALSE);
diff --git a/testgen/template/flavors/oracle/gen_query_tests/gen_Table_Freshness.sql b/testgen/template/flavors/oracle/gen_query_tests/gen_Table_Freshness.sql
new file mode 100644
index 00000000..29690379
--- /dev/null
+++ b/testgen/template/flavors/oracle/gen_query_tests/gen_Table_Freshness.sql
@@ -0,0 +1,181 @@
+WITH latest_run AS (
+ -- Latest complete profiling run before as-of-date
+ SELECT MAX(run_date) AS last_run_date
+ FROM profile_results
+ WHERE table_groups_id = :TABLE_GROUPS_ID ::UUID
+ AND run_date::DATE <= :AS_OF_DATE ::DATE
+),
+latest_results AS (
+ -- Column results for latest run
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type,
+ distinct_value_ct, record_ct, null_value_ct,
+ max_value, min_value, avg_value, stdev_value
+ FROM profile_results p
+ INNER JOIN latest_run lr ON p.run_date = lr.last_run_date
+ WHERE table_groups_id = :TABLE_GROUPS_ID ::UUID
+),
+-- IDs - TOP 2
+id_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type, distinct_value_ct,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY
+ CASE
+ WHEN functional_data_type ILIKE 'ID-Unique%' THEN 1
+ WHEN functional_data_type = 'ID-Secondary' THEN 2
+ ELSE 3
+ END, distinct_value_ct DESC, column_name
+ ) AS rank
+ FROM latest_results
+ WHERE general_type IN ('A', 'D', 'N')
+ AND functional_data_type ILIKE 'ID%'
+),
+-- Process Date - TOP 1
+process_date_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type, distinct_value_ct,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY
+ CASE
+ WHEN column_name ILIKE '%mod%' THEN 1
+ WHEN column_name ILIKE '%up%' THEN 1
+ WHEN column_name ILIKE '%cr%' THEN 2
+ WHEN column_name ILIKE '%in%' THEN 2
+ END, distinct_value_ct DESC, column_name
+ ) AS rank
+ FROM latest_results
+ WHERE general_type IN ('A', 'D', 'N')
+ AND functional_data_type ILIKE 'process%'
+),
+-- Transaction Date - TOP 1
+tran_date_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type, distinct_value_ct,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY distinct_value_ct DESC, column_name
+ ) AS rank
+ FROM latest_results
+ WHERE general_type IN ('A', 'D', 'N')
+ AND functional_data_type ILIKE 'transactional date%'
+ OR functional_data_type ILIKE 'period%'
+ OR functional_data_type = 'timestamp'
+),
+-- Numeric Measures
+numeric_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type,
+ -- Weighted score
+ (
+ 0.25 * (distinct_value_ct * 1.0 / NULLIF(record_ct, 0)) +
+ 0.15 * ((max_value - min_value) / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) +
+ 0.10 * (LEAST(1, LOG(GREATEST(distinct_value_ct, 2))) / LOG(GREATEST(record_ct, 2))) +
+ 0.40 * (stdev_value / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) +
+ 0.10 * (1.0 - (null_value_ct * 1.0 / NULLIF(NULLIF(record_ct, 0), 1)))
+ ) AS change_detection_score
+ FROM latest_results
+ WHERE general_type = 'N'
+ AND (
+ functional_data_type ILIKE 'Measure%'
+ OR functional_data_type IN ('Sequence', 'Constant')
+ )
+),
+numeric_cols_ranked AS (
+ SELECT *,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY change_detection_score DESC, column_name
+ ) AS rank
+ FROM numeric_cols
+ WHERE change_detection_score IS NOT NULL
+),
+combined AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'ID' AS element_type, general_type, 10 + rank AS fingerprint_order
+ FROM id_cols
+ WHERE rank <= 2
+ UNION ALL
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'DATE_P' AS element_type, general_type, 20 + rank AS fingerprint_order
+ FROM process_date_cols
+ WHERE rank = 1
+ UNION ALL
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'DATE_T' AS element_type, general_type, 30 + rank AS fingerprint_order
+ FROM tran_date_cols
+ WHERE rank = 1
+ UNION ALL
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'MEAS' AS element_type, general_type, 40 + rank AS fingerprint_order
+ FROM numeric_cols_ranked
+ WHERE rank = 1
+),
+selected_tables AS (
+ SELECT profile_run_id, schema_name, table_name,
+ 'TO_CHAR(COUNT(*)) || ''|'' || ' ||
+ STRING_AGG(
+ REPLACE(
+ CASE
+ WHEN general_type = 'D' THEN 'TO_CHAR(MIN(@@@)) || ''|'' || TO_CHAR(MAX(@@@)) || ''|'' || TO_CHAR(COUNT(DISTINCT @@@))'
+ WHEN general_type = 'A' THEN 'TO_CHAR(MIN(@@@)) || ''|'' || TO_CHAR(MAX(@@@)) || ''|'' || TO_CHAR(COUNT(DISTINCT @@@)) || ''|'' || TO_CHAR(SUM(LENGTH(@@@)))'
+ WHEN general_type = 'N' THEN 'TO_CHAR(COUNT(@@@)) || ''|'' ||
+ TO_CHAR(COUNT(DISTINCT MOD(CAST(CAST(COALESCE(@@@,0) AS NUMBER(38,6)) * 1000000 AS NUMBER(38,0)), 1000003))) || ''|'' ||
+ COALESCE(TO_CHAR(CAST(MIN(@@@) AS NUMBER(38,6))), '''') || ''|'' ||
+ COALESCE(TO_CHAR(CAST(MAX(@@@) AS NUMBER(38,6))), '''') || ''|'' ||
+ COALESCE(TO_CHAR(MOD(COALESCE(SUM(MOD(CAST(ABS(COALESCE(@@@,0)) * 1000000 AS NUMBER), 1000000007)), 0), 1000000007)), '''') || ''|'' ||
+ COALESCE(TO_CHAR(MOD(COALESCE(SUM(MOD(CAST(ABS(COALESCE(@@@,0)) * 1000000 AS NUMBER), 1000000009)), 0), 1000000009)), '''')'
+ END,
+ '@@@', '"' || column_name || '"'
+ ),
+ ' || ''|'' || '
+ ORDER BY element_type, fingerprint_order, column_name
+ ) AS fingerprint
+ FROM combined
+ GROUP BY profile_run_id, schema_name, table_name
+)
+-- Insert tests for selected tables
+INSERT INTO test_definitions (
+ table_groups_id, test_suite_id, test_type,
+ schema_name, table_name,
+ test_active, last_auto_gen_date, profiling_as_of_date, profile_run_id,
+ history_calculation, history_lookback, custom_query
+)
+SELECT
+ :TABLE_GROUPS_ID ::UUID AS table_groups_id,
+ :TEST_SUITE_ID ::UUID AS test_suite_id,
+ 'Table_Freshness' AS test_type,
+ s.schema_name,
+ s.table_name,
+ 'Y' AS test_active,
+ :RUN_DATE ::TIMESTAMP AS last_auto_gen_date,
+ :AS_OF_DATE ::TIMESTAMP AS profiling_as_of_date,
+ s.profile_run_id,
+ 'Value' AS history_calculation,
+ 1 AS history_lookback,
+ s.fingerprint AS custom_query
+FROM selected_tables s
+ -- Only insert if test type is active
+WHERE EXISTS (SELECT 1 FROM test_types WHERE test_type = 'Table_Freshness' AND active = 'Y')
+ -- Only insert if test type is included in generation set
+ AND EXISTS (SELECT 1 FROM generation_sets WHERE test_type = 'Table_Freshness' AND generation_set = :GENERATION_SET)
+
+-- Match "uix_td_autogen_table" unique index exactly
+ON CONFLICT (test_suite_id, test_type, schema_name, table_name)
+WHERE last_auto_gen_date IS NOT NULL
+ AND table_name IS NOT NULL
+ AND column_name IS NULL
+
+-- Update tests if they already exist
+DO UPDATE SET
+ test_active = EXCLUDED.test_active,
+ last_auto_gen_date = EXCLUDED.last_auto_gen_date,
+ profiling_as_of_date = EXCLUDED.profiling_as_of_date,
+ profile_run_id = EXCLUDED.profile_run_id,
+ history_calculation = EXCLUDED.history_calculation,
+ history_lookback = EXCLUDED.history_lookback,
+ custom_query = EXCLUDED.custom_query
+-- Ignore locked tests
+WHERE test_definitions.lock_refresh = 'N';
diff --git a/testgen/template/flavors/oracle/profiling/project_profiling_query.sql b/testgen/template/flavors/oracle/profiling/project_profiling_query.sql
new file mode 100644
index 00000000..b27953b9
--- /dev/null
+++ b/testgen/template/flavors/oracle/profiling/project_profiling_query.sql
@@ -0,0 +1,368 @@
+SELECT
+ main.connection_id,
+ main.project_code,
+ main.table_groups_id,
+ main.schema_name,
+ main.run_date,
+ main.table_name,
+ main.position,
+ main.column_name,
+ main.column_type,
+ main.db_data_type,
+ main.general_type,
+ main.record_ct,
+ main.value_ct,
+ main.distinct_value_ct,
+ main.null_value_ct,
+ main.min_length,
+ main.max_length,
+ main.avg_length,
+ main.zero_value_ct,
+ main.distinct_std_value_ct,
+ main.zero_length_ct,
+ main.lead_space_ct,
+ main.quoted_value_ct,
+ main.includes_digit_ct,
+ main.filled_value_ct,
+ main.min_text,
+ main.max_text,
+ main.upper_case_ct,
+ main.lower_case_ct,
+ main.non_alpha_ct,
+ main.non_printing_ct,
+ main.numeric_ct,
+ main.date_ct,
+ main.std_pattern_match,
+-- TG-IF is_type_A
+ patterns.top_patterns,
+-- TG-ELSE
+ NULL AS top_patterns,
+-- TG-ENDIF
+ main.min_value,
+ main.min_value_over_0,
+ main.max_value,
+ main.avg_value,
+ main.stdev_value,
+ main.percentile_25,
+ main.percentile_50,
+ main.percentile_75,
+ main.fractional_sum,
+ main.min_date,
+ main.max_date,
+ main.before_1yr_date_ct,
+ main.before_5yr_date_ct,
+ main.before_20yr_date_ct,
+ main.before_100yr_date_ct,
+ main.within_1yr_date_ct,
+ main.within_1mo_date_ct,
+ main.future_date_ct,
+ main.distant_future_date_ct,
+ main.date_days_present,
+ main.date_weeks_present,
+ main.date_months_present,
+ main.boolean_true_ct,
+-- TG-IF is_type_A
+ patterns.distinct_pattern_ct,
+-- TG-ELSE
+ NULL AS distinct_pattern_ct,
+-- TG-ENDIF
+ main.embedded_space_ct,
+ main.avg_embedded_spaces,
+ main.profile_run_id
+FROM (
+ SELECT
+ {CONNECTION_ID} AS connection_id,
+ '{PROJECT_CODE}' AS project_code,
+ '{TABLE_GROUPS_ID}' AS table_groups_id,
+ '{DATA_SCHEMA}' AS schema_name,
+ '{RUN_DATE}' AS run_date,
+ '{DATA_TABLE}' AS table_name,
+ {COL_POS} AS position,
+ '{COL_NAME_SANITIZED}' AS column_name,
+ '{COL_TYPE}' AS column_type,
+ '{DB_DATA_TYPE}' AS db_data_type,
+ '{COL_GEN_TYPE}' AS general_type,
+ COUNT(*) AS record_ct,
+-- TG-IF is_type_X
+ COUNT(CASE WHEN "{COL_NAME}" IS NOT NULL THEN 1 END) AS value_ct,
+ NULL AS distinct_value_ct,
+-- TG-ELSE
+ COUNT("{COL_NAME}") AS value_ct,
+ COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
+-- TG-ENDIF
+ SUM(CASE WHEN "{COL_NAME}" IS NULL THEN 1 ELSE 0 END) AS null_value_ct,
+-- TG-IF is_type_ADN
+ MIN(LENGTH(TO_CHAR("{COL_NAME}"))) AS min_length,
+ MAX(LENGTH(TO_CHAR("{COL_NAME}"))) AS max_length,
+ AVG(NULLIF(LENGTH(TO_CHAR("{COL_NAME}")), 0)) AS avg_length,
+-- TG-ELSE
+ NULL AS min_length,
+ NULL AS max_length,
+ NULL AS avg_length,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ SUM(CASE
+ WHEN REGEXP_LIKE(TRIM("{COL_NAME}"), '^0(\.0*)?$') THEN 1 ELSE 0
+ END) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ SUM(1 - ABS(SIGN("{COL_NAME}"))) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_not_A_not_N
+ NULL AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", 'X '',.-', 'X'))) AS distinct_std_value_ct,
+ 0 AS zero_length_ct,
+ SUM(CASE
+ WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1
+ ELSE 0
+ END) AS lead_space_ct,
+ SUM(CASE WHEN "{COL_NAME}" LIKE '"%"' OR "{COL_NAME}" LIKE '''%''' THEN 1 ELSE 0 END) AS quoted_value_ct,
+ SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}", '[0-9]') THEN 1 ELSE 0 END) AS includes_digit_ct,
+ SUM(CASE
+ WHEN REGEXP_LIKE(LOWER("{COL_NAME}"), '^(\.{1,}|-{1,}|\?{1,}|[[:space:]]{1,}|0{2,}|9{2,}|x{2,}|z{2,})$') THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
+ 'n/a','#na','none','null','unknown') THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
+ '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
+ '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
+ ELSE 0
+ END) AS filled_value_ct,
+ SUBSTR(MIN(CASE WHEN "{COL_NAME}" IS NOT NULL THEN "{COL_NAME}" END), 1, 100) AS min_text,
+ SUBSTR(MAX(CASE WHEN "{COL_NAME}" IS NOT NULL THEN "{COL_NAME}" END), 1, 100) AS max_text,
+ SUM(CASE
+ WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = "{COL_NAME}" THEN 0
+ WHEN TRANSLATE("{COL_NAME}", 'abcdefghijklmnopqrstuvwxyz', ' ') = "{COL_NAME}" THEN 1
+ ELSE 0
+ END) AS upper_case_ct,
+ SUM(CASE
+ WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = "{COL_NAME}" THEN 0
+ WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', ' ') = "{COL_NAME}" THEN 1
+ ELSE 0
+ END) AS lower_case_ct,
+ SUM(CASE
+ WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = "{COL_NAME}" THEN 1
+ ELSE 0
+ END) AS non_alpha_ct,
+ COUNT(CASE WHEN TRANSLATE("{COL_NAME}",
+ 'X' || UNISTR('\00A0') || UNISTR('\2009') || UNISTR('\200B') || UNISTR('\200C') || UNISTR('\200D') || UNISTR('\200E') || UNISTR('\200F') || UNISTR('\202F') || UNISTR('\3000') || UNISTR('\FEFF'),
+ 'XXXXXXXXXXX') <> "{COL_NAME}" THEN 1 END) AS non_printing_ct,
+ SUM(<%IS_NUM;SUBSTR("{COL_NAME}", 1, 31)%>) AS numeric_ct,
+ SUM(<%IS_DATE;SUBSTR("{COL_NAME}", 1, 26)%>) AS date_ct,
+ CASE
+ WHEN SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}", '^[0-9]{1,5}[a-zA-Z]?[[:space:]][[:alnum:]_]{1,5}\.?[[:space:]]?[[:alnum:]_]*[[:space:]]?[[:alnum:]_]*[[:space:]][a-zA-Z]{1,6}\.?[[:space:]]?[0-9]{0,5}[A-Z]?$')
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.8 THEN 'STREET_ADDR'
+ WHEN SUM(CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.9 THEN 'STATE_USA'
+ WHEN SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}", '^(\+1|1)?[ .-]?(\([2-9][0-9]{2}\)|[2-9][0-9]{2})[ .-]?[2-9][0-9]{2}[ .-]?[0-9]{4}$')
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.8 THEN 'PHONE_USA'
+ WHEN SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}", '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$')
+ AND "{COL_NAME}" NOT LIKE '%://%'
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.9 THEN 'EMAIL'
+ WHEN SUM(CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.9 THEN 'ZIP_USA'
+ WHEN SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}", '^[[:alnum:]_[:space:]-]+\.(txt|csv|tsv|dat|doc|pdf|xlsx)$')
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.9 THEN 'FILE_NAME'
+ WHEN SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}", '^([0-9]{4}[- ]?){3}[0-9]{4}$')
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.8 THEN 'CREDIT_CARD'
+ WHEN SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}", '^([^,|' || CHR(9) || ']{1,20}[,|' || CHR(9) || ']){2,}[^,|' || CHR(9) || ']{0,20}([,|' || CHR(9) || ']?[^,|' || CHR(9) || ']{0,20})*$')
+ AND NOT REGEXP_LIKE("{COL_NAME}", '[[:space:]](and|but|or|yet)[[:space:]]')
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.8 THEN 'DELIMITED_DATA'
+ WHEN SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}", '^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$')
+ AND SUBSTR("{COL_NAME}", 1, 3) NOT BETWEEN '734' AND '749'
+ AND SUBSTR("{COL_NAME}", 1, 3) <> '666' THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.9 THEN 'SSN'
+ END AS std_pattern_match,
+-- TG-ELSE
+ NULL AS distinct_std_value_ct,
+ NULL AS zero_length_ct,
+ NULL AS lead_space_ct,
+ NULL AS quoted_value_ct,
+ NULL AS includes_digit_ct,
+ NULL AS filled_value_ct,
+ NULL AS min_text,
+ NULL AS max_text,
+ NULL AS upper_case_ct,
+ NULL AS lower_case_ct,
+ NULL AS non_alpha_ct,
+ NULL AS non_printing_ct,
+ NULL AS numeric_ct,
+ NULL AS date_ct,
+ NULL AS std_pattern_match,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ MIN("{COL_NAME}") AS min_value,
+ MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
+ MAX("{COL_NAME}") AS max_value,
+ AVG(CAST("{COL_NAME}" AS NUMBER)) AS avg_value,
+ STDDEV(CAST("{COL_NAME}" AS NUMBER)) AS stdev_value,
+ MIN(pct_25) AS percentile_25,
+ MIN(pct_50) AS percentile_50,
+ MIN(pct_75) AS percentile_75,
+-- TG-ELSE
+ NULL AS min_value,
+ NULL AS min_value_over_0,
+ NULL AS max_value,
+ NULL AS avg_value,
+ NULL AS stdev_value,
+ NULL AS percentile_25,
+ NULL AS percentile_50,
+ NULL AS percentile_75,
+-- TG-ENDIF
+-- TG-IF is_N_decimal
+ SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) AS fractional_sum,
+-- TG-ELSE
+ NULL AS fractional_sum,
+-- TG-ENDIF
+-- TG-IF is_type_D
+ CASE
+ WHEN MIN("{COL_NAME}") IS NULL THEN NULL
+ ELSE GREATEST(MIN("{COL_NAME}"), TO_DATE('0001-01-01', 'YYYY-MM-DD'))
+ END AS min_date,
+ MAX("{COL_NAME}") AS max_date,
+ SUM(CASE
+ WHEN <%DATEDIFF_MONTH;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> > 12 THEN 1
+ ELSE 0
+ END) AS before_1yr_date_ct,
+ SUM(CASE
+ WHEN <%DATEDIFF_MONTH;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> > 60 THEN 1
+ ELSE 0
+ END) AS before_5yr_date_ct,
+ SUM(CASE
+ WHEN <%DATEDIFF_MONTH;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> > 240 THEN 1
+ ELSE 0
+ END) AS before_20yr_date_ct,
+ SUM(CASE
+ WHEN <%DATEDIFF_MONTH;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> > 1200 THEN 1
+ ELSE 0
+ END) AS before_100yr_date_ct,
+ SUM(CASE
+ WHEN <%DATEDIFF_DAY;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> BETWEEN 0 AND 365 THEN 1
+ ELSE 0
+ END) AS within_1yr_date_ct,
+ SUM(CASE
+ WHEN <%DATEDIFF_DAY;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> BETWEEN 0 AND 30 THEN 1
+ ELSE 0
+ END) AS within_1mo_date_ct,
+ SUM(CASE
+ WHEN "{COL_NAME}" > TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS') THEN 1 ELSE 0
+ END) AS future_date_ct,
+ SUM(CASE
+ WHEN <%DATEDIFF_MONTH;TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS');"{COL_NAME}"%> > 240 THEN 1
+ ELSE 0
+ END) AS distant_future_date_ct,
+ COUNT(DISTINCT <%DATEDIFF_DAY;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>) AS date_days_present,
+ COUNT(DISTINCT <%DATEDIFF_WEEK;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>) AS date_weeks_present,
+ COUNT(DISTINCT <%DATEDIFF_MONTH;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>) AS date_months_present,
+-- TG-ELSE
+ NULL AS min_date,
+ NULL AS max_date,
+ NULL AS before_1yr_date_ct,
+ NULL AS before_5yr_date_ct,
+ NULL AS before_20yr_date_ct,
+ NULL AS before_100yr_date_ct,
+ NULL AS within_1yr_date_ct,
+ NULL AS within_1mo_date_ct,
+ NULL AS future_date_ct,
+ NULL AS distant_future_date_ct,
+ NULL AS date_days_present,
+ NULL AS date_weeks_present,
+ NULL AS date_months_present,
+-- TG-ENDIF
+-- TG-IF is_type_B
+ SUM(CAST("{COL_NAME}" AS NUMBER)) AS boolean_true_ct,
+-- TG-ELSE
+ NULL AS boolean_true_ct,
+-- TG-ENDIF
+-- TG-IF is_A_sampling
+ SUM(SIGN(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REPLACE(TRIM("{COL_NAME}"), ' ', '')))) AS embedded_space_ct,
+ AVG(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REPLACE(TRIM("{COL_NAME}"), ' ', ''))) AS avg_embedded_spaces,
+-- TG-ENDIF
+-- TG-IF is_A_no_sampling
+ SUM(SIGN(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REPLACE(TRIM("{COL_NAME}"), ' ', '')))) AS embedded_space_ct,
+ AVG(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REPLACE(TRIM("{COL_NAME}"), ' ', ''))) AS avg_embedded_spaces,
+-- TG-ENDIF
+-- TG-IF is_not_A
+ NULL AS embedded_space_ct,
+ NULL AS avg_embedded_spaces,
+-- TG-ENDIF
+ '{PROFILE_RUN_ID}' AS profile_run_id
+-- TG-IF do_sample
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" SAMPLE ({SAMPLE_PERCENT_CALC})
+-- TG-ELSE
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
+-- TG-ENDIF
+-- TG-IF is_N_sampling
+ , (SELECT
+ PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_25,
+ PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_50,
+ PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_75
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" SAMPLE ({SAMPLE_PERCENT_CALC}) WHERE ROWNUM <= 1000000) pctile
+-- TG-ENDIF
+-- TG-IF is_N_no_sampling
+ , (SELECT
+ PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_25,
+ PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_50,
+ PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_75
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" WHERE ROWNUM <= 1000000) pctile
+-- TG-ENDIF
+) main
+-- TG-IF is_A_sampling
+CROSS JOIN (
+ SELECT
+ (SELECT SUBSTR(LISTAGG(formatted_pattern, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1, 1000)
+ FROM (
+ SELECT TO_CHAR(COUNT(*)) || ' | ' || pattern AS formatted_pattern,
+ COUNT(*) AS ct
+ FROM (SELECT REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
+ "{COL_NAME}", '[a-z]', 'a'),
+ '[A-Z]', 'A'),
+ '[0-9]', 'N') AS pattern
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" SAMPLE ({SAMPLE_PERCENT_CALC})
+ WHERE "{COL_NAME}" IS NOT NULL AND "{COL_NAME}" > ' ' AND (SELECT MAX(LENGTH("{COL_NAME}"))
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" SAMPLE ({SAMPLE_PERCENT_CALC})) BETWEEN 3 and {MAX_PATTERN_LENGTH}) p
+ GROUP BY pattern
+ HAVING pattern > ' '
+ ORDER BY COUNT(*) DESC
+ FETCH FIRST 5 ROWS ONLY
+ ) ps) AS top_patterns,
+ (SELECT COUNT(DISTINCT REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
+ "{COL_NAME}", '[a-z]', 'a'),
+ '[A-Z]', 'A'),
+ '[0-9]', 'N')
+ )
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" SAMPLE ({SAMPLE_PERCENT_CALC})
+ WHERE "{COL_NAME}" IS NOT NULL AND "{COL_NAME}" > ' ') AS distinct_pattern_ct
+ FROM DUAL
+) patterns
+-- TG-ENDIF
+-- TG-IF is_A_no_sampling
+CROSS JOIN (
+ SELECT
+ (SELECT SUBSTR(LISTAGG(formatted_pattern, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1, 1000)
+ FROM (
+ SELECT TO_CHAR(COUNT(*)) || ' | ' || pattern AS formatted_pattern,
+ COUNT(*) AS ct
+ FROM (SELECT REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
+ "{COL_NAME}", '[a-z]', 'a'),
+ '[A-Z]', 'A'),
+ '[0-9]', 'N') AS pattern
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
+ WHERE "{COL_NAME}" IS NOT NULL AND "{COL_NAME}" > ' ' AND (SELECT MAX(LENGTH("{COL_NAME}"))
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}") BETWEEN 3 and {MAX_PATTERN_LENGTH}) p
+ GROUP BY pattern
+ HAVING pattern > ' '
+ ORDER BY COUNT(*) DESC
+ FETCH FIRST 5 ROWS ONLY
+ ) ps) AS top_patterns,
+ (SELECT COUNT(DISTINCT REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
+ "{COL_NAME}", '[a-z]', 'a'),
+ '[A-Z]', 'A'),
+ '[0-9]', 'N')
+ )
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
+ WHERE "{COL_NAME}" IS NOT NULL AND "{COL_NAME}" > ' ') AS distinct_pattern_ct
+ FROM DUAL
+) patterns
+-- TG-ENDIF
diff --git a/testgen/template/flavors/oracle/profiling/project_secondary_profiling_query.sql b/testgen/template/flavors/oracle/profiling/project_secondary_profiling_query.sql
new file mode 100644
index 00000000..4e67b07d
--- /dev/null
+++ b/testgen/template/flavors/oracle/profiling/project_secondary_profiling_query.sql
@@ -0,0 +1,41 @@
+-- Get Freqs for selected columns
+WITH ranked_vals AS (
+ SELECT "{COL_NAME}",
+ COUNT(*) AS ct,
+ ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC, "{COL_NAME}") AS rn
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
+-- TG-IF do_sample_bool
+ SAMPLE ({SAMPLE_PERCENT_CALC})
+-- TG-ENDIF
+ WHERE "{COL_NAME}" IS NOT NULL AND "{COL_NAME}" > ' '
+ GROUP BY "{COL_NAME}"
+),
+consol_vals AS (
+ SELECT COALESCE(CASE WHEN rn <= 10 THEN '| ' || "{COL_NAME}" || ' | ' || TO_CHAR(ct)
+ ELSE NULL
+ END, '| Other Values (' || TO_CHAR(COUNT(DISTINCT "{COL_NAME}")) || ') | ' || TO_CHAR(SUM(ct))) AS val,
+ MIN(rn) as min_rn
+ FROM ranked_vals
+ GROUP BY CASE WHEN rn <= 10 THEN '| ' || "{COL_NAME}" || ' | ' || TO_CHAR(ct)
+ ELSE NULL
+ END
+),
+hash_val AS (
+ SELECT RAWTOHEX(STANDARD_HASH(LISTAGG("{COL_NAME}", '|') WITHIN GROUP (ORDER BY "{COL_NAME}"), 'MD5')) as hash_result
+ FROM (SELECT DISTINCT "{COL_NAME}"
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
+-- TG-IF do_sample_bool
+ SAMPLE ({SAMPLE_PERCENT_CALC})
+-- TG-ENDIF
+ WHERE "{COL_NAME}" IS NOT NULL AND "{COL_NAME}" > ' ')
+)
+SELECT '{PROJECT_CODE}' as project_code,
+ '{DATA_SCHEMA}' as schema_name,
+ '{RUN_DATE}' as run_date,
+ '{DATA_TABLE}' as table_name,
+ '{COL_NAME}' as column_name,
+ REPLACE(LISTAGG(val, '^#^') WITHIN GROUP (ORDER BY min_rn), '^#^', CHR(10)) AS top_freq_values,
+ MAX(h.hash_result) as distinct_value_hash
+ FROM consol_vals
+ CROSS JOIN hash_val h
+ GROUP BY h.hash_result
diff --git a/testgen/template/flavors/oracle/profiling/templated_functions.yaml b/testgen/template/flavors/oracle/profiling/templated_functions.yaml
new file mode 100644
index 00000000..c2febea1
--- /dev/null
+++ b/testgen/template/flavors/oracle/profiling/templated_functions.yaml
@@ -0,0 +1,108 @@
+DATEDIFF_DAY: TRUNC({$2}) - TRUNC({$1})
+
+DATEDIFF_WEEK: (TRUNC({$2}, 'IW') - TRUNC({$1}, 'IW')) / 7
+
+DATEDIFF_MONTH: FLOOR(MONTHS_BETWEEN(TRUNC({$2}, 'MM'), TRUNC({$1}, 'MM')))
+
+DATEDIFF_QUARTER: FLOOR(MONTHS_BETWEEN(TRUNC({$2}, 'MM'), TRUNC({$1}, 'MM')) / 3)
+
+DATEDIFF_YEAR: EXTRACT(YEAR FROM {$2}) - EXTRACT(YEAR FROM {$1})
+
+IS_NUM: CASE
+ WHEN REGEXP_LIKE({$1}, '^[[:space:]]*[+-]?\$?[[:space:]]*[0-9]+(,[0-9]{3})*(\.[0-9]*)?[%]?[[:space:]]*$') THEN 1
+ ELSE 0
+ END
+
+IS_DATE: CASE
+ /* YYYY-MM-DD HH:MM:SS SSSSSS or YYYY-MM-DD HH:MM:SS */
+ WHEN REGEXP_LIKE({$1}, '^([0-9]{4})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])[[:space:]](2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])([[:space:]][0-9]{6})?$')
+ THEN CASE
+ WHEN TO_NUMBER(SUBSTR({$1}, 1, 4)) BETWEEN 1800 AND 2200
+ AND (
+ ( SUBSTR({$1}, 6, 2) IN ('01', '03', '05', '07', '08',
+ '10', '12')
+ AND TO_NUMBER(SUBSTR({$1}, 9, 2)) BETWEEN 1 AND 31 )
+ OR ( SUBSTR({$1}, 6, 2) IN ('04', '06', '09')
+ AND TO_NUMBER(SUBSTR({$1}, 9, 2)) BETWEEN 1 AND 30 )
+ OR ( SUBSTR({$1}, 6, 2) = '02'
+ AND TO_NUMBER(SUBSTR({$1}, 9, 2)) BETWEEN 1 AND 29)
+ )
+ THEN 1
+ ELSE 0
+ END
+ /* YYYYMMDDHHMMSSSSSS or YYYYMMDDHHMM */
+ WHEN REGEXP_LIKE({$1}, '^([0-9]{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])([0-5][0-9])([0-5][0-9])([0-9]{6})$')
+ OR REGEXP_LIKE({$1}, '^([0-9]{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])$')
+ THEN CASE
+ WHEN TO_NUMBER(SUBSTR({$1}, 1, 4)) BETWEEN 1800 AND 2200
+ AND (
+ ( SUBSTR({$1}, 5, 2) IN ('01', '03', '05', '07', '08',
+ '10', '12')
+ AND TO_NUMBER(SUBSTR({$1}, 7, 2)) BETWEEN 1 AND 31 )
+ OR ( SUBSTR({$1}, 5, 2) IN ('04', '06', '09')
+ AND TO_NUMBER(SUBSTR({$1}, 7, 2)) BETWEEN 1 AND 30 )
+ OR ( SUBSTR({$1}, 5, 2) = '02'
+ AND TO_NUMBER(SUBSTR({$1}, 7, 2)) BETWEEN 1 AND 29)
+ )
+ THEN 1
+ ELSE 0
+ END
+ /* Exclude anything else long */
+ WHEN LENGTH({$1}) > 11 THEN 0
+ /* YYYY-MMM/MM-DD */
+ WHEN REGEXP_LIKE(REGEXP_REPLACE(UPPER({$1}), '(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)', '12'),
+ '[12][09][0-9][0-9]-[0-1]?[0-9]-[0-3]?[0-9]')
+ THEN CASE
+ WHEN TO_NUMBER(REGEXP_SUBSTR({$1}, '^[^-]+')) BETWEEN 1800 AND 2200
+ AND (
+ ( UPPER(REGEXP_SUBSTR({$1}, '[^-]+', 1, 2)) IN ('01', '03', '05', '07', '08',
+ '1', '3', '5', '7', '8', '10', '12',
+ 'JAN', 'MAR', 'MAY', 'JUL', 'AUG',
+ 'OCT', 'DEC')
+ AND TO_NUMBER(REGEXP_SUBSTR({$1}, '[^-]+$')) BETWEEN 1 AND 31 )
+ OR ( UPPER(REGEXP_SUBSTR({$1}, '[^-]+', 1, 2)) IN ('04', '06', '09', '4', '6', '9', '11',
+ 'APR', 'JUN', 'SEP', 'NOV')
+ AND TO_NUMBER(REGEXP_SUBSTR({$1}, '[^-]+$')) BETWEEN 1 AND 30 )
+ OR ( UPPER(REGEXP_SUBSTR({$1}, '[^-]+', 1, 2)) IN ('02', '2', 'FEB')
+ AND TO_NUMBER(REGEXP_SUBSTR({$1}, '[^-]+$')) BETWEEN 1 AND 29)
+ )
+ THEN 1
+ ELSE 0
+ END
+ /* MM/-DD/-YY/YYYY */
+ WHEN REGEXP_LIKE(REPLACE({$1}, '-', '/'), '^[0-1]?[0-9]/[0-3]?[0-9]/[12][09][0-9][0-9]$')
+ OR REGEXP_LIKE(REPLACE({$1}, '-', '/'), '^[0-1]?[0-9]/[0-3]?[0-9]/[0-9][0-9]$')
+ THEN
+ CASE
+ WHEN TO_NUMBER(REGEXP_SUBSTR(REPLACE({$1}, '-', '/'), '[^/]+', 1, 1)) BETWEEN 1 AND 12
+ AND (
+ ( TO_NUMBER(REGEXP_SUBSTR(REPLACE({$1}, '-', '/'), '[^/]+', 1, 1)) IN (1, 3, 5, 7, 8, 10, 12)
+ AND TO_NUMBER(REGEXP_SUBSTR(REPLACE({$1}, '-', '/'), '[^/]+', 1, 2)) BETWEEN 1 AND 31 )
+ OR ( TO_NUMBER(REGEXP_SUBSTR(REPLACE({$1}, '-', '/'), '[^/]+', 1, 1)) IN (4, 6, 9, 11)
+ AND TO_NUMBER(REGEXP_SUBSTR(REPLACE({$1}, '-', '/'), '[^/]+', 1, 2)) BETWEEN 1 AND 30 )
+ OR ( TO_NUMBER(REGEXP_SUBSTR(REPLACE({$1}, '-', '/'), '[^/]+', 1, 1)) = 2
+ AND TO_NUMBER(REGEXP_SUBSTR(REPLACE({$1}, '-', '/'), '[^/]+', 1, 2)) BETWEEN 1 AND 29)
+ )
+ AND
+ TO_NUMBER('20' || SUBSTR(REGEXP_SUBSTR(REPLACE({$1}, '-', '/'), '[^/]+$'), -2)) BETWEEN 1800 AND 2200
+ THEN 1
+ ELSE 0
+ END
+ /* DD-MMM-YYYY */
+ WHEN REGEXP_LIKE(UPPER({$1}), '[0-3]?[0-9]-(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-[12][09][0-9][0-9]')
+ THEN
+ CASE
+ WHEN TO_NUMBER(REGEXP_SUBSTR({$1}, '[^-]+$')) BETWEEN 1800 AND 2200
+ AND (
+ ( UPPER(REGEXP_SUBSTR({$1}, '[^-]+', 1, 2)) IN ('JAN', 'MAR', 'MAY', 'JUL', 'AUG', 'OCT', 'DEC')
+ AND TO_NUMBER(REGEXP_SUBSTR({$1}, '^[^-]+')) BETWEEN 1 AND 31 )
+ OR ( UPPER(REGEXP_SUBSTR({$1}, '[^-]+', 1, 2)) IN ('APR', 'JUN', 'SEP', 'NOV')
+ AND TO_NUMBER(REGEXP_SUBSTR({$1}, '^[^-]+')) BETWEEN 1 AND 30 )
+ OR ( UPPER(REGEXP_SUBSTR({$1}, '[^-]+', 1, 2)) = 'FEB'
+ AND TO_NUMBER(REGEXP_SUBSTR({$1}, '^[^-]+')) BETWEEN 1 AND 29)
+ )
+ THEN 1
+ ELSE 0
+ END
+ ELSE 0
+ END
diff --git a/testgen/template/flavors/oracle/validate_tests/get_target_identifiers.sql b/testgen/template/flavors/oracle/validate_tests/get_target_identifiers.sql
new file mode 100644
index 00000000..2685239c
--- /dev/null
+++ b/testgen/template/flavors/oracle/validate_tests/get_target_identifiers.sql
@@ -0,0 +1,5 @@
+SELECT owner AS schema_name,
+ table_name,
+ column_name
+FROM all_tab_columns
+WHERE owner IN ({TEST_SCHEMAS})
diff --git a/testgen/template/flavors/postgresql/profiling/project_profiling_query.yaml b/testgen/template/flavors/postgresql/profiling/project_profiling_query.sql
similarity index 66%
rename from testgen/template/flavors/postgresql/profiling/project_profiling_query.yaml
rename to testgen/template/flavors/postgresql/profiling/project_profiling_query.sql
index 67156d77..b2cb78bf 100644
--- a/testgen/template/flavors/postgresql/profiling/project_profiling_query.yaml
+++ b/testgen/template/flavors/postgresql/profiling/project_profiling_query.sql
@@ -1,18 +1,14 @@
----
-01_sampling: |
- WITH target_table AS (
- SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}" TABLESAMPLE BERNOULLI ({SAMPLE_PERCENT_CALC}) REPEATABLE (64)
- )
- SELECT
-01_else: |
- WITH target_table AS (
- SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
- )
- SELECT
-01_all: |
- {CONNECTION_ID} as connection_id,
- '{PROJECT_CODE}' as project_code,
- '{TABLE_GROUPS_ID}' as table_groups_id,
+WITH target_table AS (
+-- TG-IF do_sample
+ SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}" TABLESAMPLE BERNOULLI ({SAMPLE_PERCENT_CALC}) REPEATABLE (64)
+-- TG-ELSE
+ SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
+-- TG-ENDIF
+)
+SELECT
+ {CONNECTION_ID} AS connection_id,
+ '{PROJECT_CODE}' AS project_code,
+ '{TABLE_GROUPS_ID}' AS table_groups_id,
'{DATA_SCHEMA}' AS schema_name,
'{RUN_DATE}' AS run_date,
'{DATA_TABLE}' AS table_name,
@@ -22,43 +18,43 @@
'{DB_DATA_TYPE}' AS db_data_type,
'{COL_GEN_TYPE}' AS general_type,
COUNT(*) AS record_ct,
-
-02_X: |
COUNT("{COL_NAME}") AS value_ct,
COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
SUM(CASE WHEN "{COL_NAME}" IS NULL THEN 1 ELSE 0 END) AS null_value_ct,
-02_else: |
- COUNT("{COL_NAME}") AS value_ct,
- COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
- SUM(CASE WHEN "{COL_NAME}" IS NULL THEN 1 ELSE 0 END) AS null_value_ct,
-
-03_ADN: MIN(LENGTH(CAST("{COL_NAME}" AS TEXT))) AS min_length,
- MAX(LENGTH(CAST("{COL_NAME}" AS TEXT))) AS max_length,
+-- TG-IF is_type_ADN
+ MIN(LENGTH(CAST("{COL_NAME}" AS TEXT))) AS min_length,
+ MAX(LENGTH(CAST("{COL_NAME}" AS TEXT))) AS max_length,
AVG(NULLIF(LENGTH(CAST("{COL_NAME}" AS TEXT)), 0)::FLOAT) AS avg_length,
-03_else: NULL as min_length,
- NULL as max_length,
- NULL as avg_length,
-
-04_A: SUM(CASE
+-- TG-ELSE
+ NULL AS min_length,
+ NULL AS max_length,
+ NULL AS avg_length,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ SUM(CASE
WHEN TRIM("{COL_NAME}") ~ '^0(\.0*)?$' THEN 1 ELSE 0
- END) AS zero_value_ct,
-04_N: SUM( 1 - ABS(SIGN("{COL_NAME}"::NUMERIC)) )::BIGINT AS zero_value_ct,
-04_else: NULL as zero_value_ct,
-
-05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) as distinct_std_value_ct,
+ END) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ SUM( 1 - ABS(SIGN("{COL_NAME}"::NUMERIC)) )::BIGINT AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_not_A_not_N
+ NULL AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) AS distinct_std_value_ct,
SUM(CASE
WHEN "{COL_NAME}" = '' THEN 1
ELSE 0
- END) AS zero_length_ct,
+ END) AS zero_length_ct,
SUM( CASE
WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1
ELSE 0
- END ) AS lead_space_ct,
- SUM( CASE WHEN "{COL_NAME}" ILIKE '"%"' OR "{COL_NAME}" ILIKE '''%''' THEN 1 ELSE 0 END ) as quoted_value_ct,
- SUM( CASE WHEN "{COL_NAME}" ~ '[0-9]' THEN 1 ELSE 0 END ) as includes_digit_ct,
+ END ) AS lead_space_ct,
+ SUM( CASE WHEN "{COL_NAME}" ILIKE '"%"' OR "{COL_NAME}" ILIKE '''%''' THEN 1 ELSE 0 END ) AS quoted_value_ct,
+ SUM( CASE WHEN "{COL_NAME}" ~ '[0-9]' THEN 1 ELSE 0 END ) AS includes_digit_ct,
SUM( CASE
- WHEN "{COL_NAME}" IN ('.', '?', ' ') THEN 1
- WHEN LOWER("{COL_NAME}") SIMILAR TO '(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)' THEN 1
+ WHEN LOWER("{COL_NAME}") SIMILAR TO '(.{1,}|-{1,}|\?{1,}|\s{1,}|0{2,}|9{2,}|x{2,}|z{2,})' THEN 1
WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
'n/a','#na','none','null','unknown') THEN 1
WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
@@ -66,9 +62,9 @@
WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
'[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
ELSE 0
- END ) AS filled_value_ct,
- LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text,
- LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text,
+ END ) AS filled_value_ct,
+ LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text,
+ LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text,
SUM(CASE
WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 0
WHEN TRANSLATE("{COL_NAME}", 'abcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 1
@@ -83,9 +79,9 @@
WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 1
ELSE 0
END) AS non_alpha_ct,
- COUNT( CASE WHEN TRANSLATE("{COL_NAME}", E'\u00a0\u2009\u200b\u200c\u200d\u200e\u200f\u202f\u3000\ufeff', 'XXXXXXXXXX') <> "{COL_NAME}" THEN 1 END) as non_printing_ct,
- SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>) AS numeric_ct,
- SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>) AS date_ct,
+ COUNT( CASE WHEN TRANSLATE("{COL_NAME}", E'\u00a0\u2009\u200b\u200c\u200d\u200e\u200f\u202f\u3000\ufeff', 'XXXXXXXXXX') <> "{COL_NAME}" THEN 1 END) AS non_printing_ct,
+ SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>) AS numeric_ct,
+ SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>) AS date_ct,
CASE
WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[0-9]{1,5}[a-zA-Z]?\s\w{1,5}\.?\s?\w*\s?\w*\s[a-zA-Z]{1,6}\.?\s?[0-9]{0,5}[A-Z]{0,1}$'
THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'STREET_ADDR'
@@ -99,7 +95,7 @@
THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'ZIP_USA'
WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[\w\s\-]+(? 0.9 THEN 'FILE_NAME'
- WHEN SUM( CASE WHEN "{COL_NAME}" SIMILAR TO '^([0-9]{4}[- ]){3}[0-9]{4}$'
+ WHEN SUM( CASE WHEN "{COL_NAME}" SIMILAR TO '^([0-9]{4}[- ]?){3}[0-9]{4}$'
THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'CREDIT_CARD'
WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'
AND "{COL_NAME}" !~ '\s(and|but|or|yet)\s'
@@ -107,24 +103,26 @@
WHEN SUM ( CASE WHEN "{COL_NAME}" SIMILAR TO '^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$'
AND LEFT("{COL_NAME}", 3) NOT BETWEEN '734' AND '749'
AND LEFT("{COL_NAME}", 3) <> '666' THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'SSN'
- END as std_pattern_match,
-05_else: NULL as distinct_std_value_ct,
- NULL as zero_length_ct,
- NULL as lead_space_ct,
- NULL as quoted_value_ct,
- NULL as includes_digit_ct,
- NULL as filled_value_ct,
- NULL as min_text,
- NULL as max_text,
- NULL as upper_case_ct,
- NULL as lower_case_ct,
- NULL as non_alpha_ct,
- NULL as non_printing_ct,
- NULL as numeric_ct,
- NULL as date_ct,
- NULL as std_pattern_match,
-
-06_A: ( SELECT LEFT(STRING_AGG(pattern, ' | ' ORDER BY ct DESC) , 1000) AS concat_pats
+ END AS std_pattern_match,
+-- TG-ELSE
+ NULL AS distinct_std_value_ct,
+ NULL AS zero_length_ct,
+ NULL AS lead_space_ct,
+ NULL AS quoted_value_ct,
+ NULL AS includes_digit_ct,
+ NULL AS filled_value_ct,
+ NULL AS min_text,
+ NULL AS max_text,
+ NULL AS upper_case_ct,
+ NULL AS lower_case_ct,
+ NULL AS non_alpha_ct,
+ NULL AS non_printing_ct,
+ NULL AS numeric_ct,
+ NULL AS date_ct,
+ NULL AS std_pattern_match,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT LEFT(STRING_AGG(pattern, ' | ' ORDER BY ct DESC) , 1000) AS concat_pats
FROM (
SELECT CAST(COUNT(*) AS VARCHAR(40)) || ' | ' || pattern AS pattern,
COUNT(*) AS ct
@@ -140,33 +138,39 @@
ORDER BY COUNT(*) DESC
LIMIT 5
) ps) AS top_patterns,
-06_else: NULL as top_patterns,
-
-08_N: MIN("{COL_NAME}") AS min_value,
- MIN(CASE WHEN "{COL_NAME}"::NUMERIC > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
- MAX("{COL_NAME}") AS max_value,
- AVG(CAST("{COL_NAME}"::NUMERIC AS FLOAT)) AS avg_value,
- STDDEV(CAST("{COL_NAME}"::NUMERIC AS FLOAT)) AS stdev_value,
- MIN(pct_25) as percentile_25,
- MIN(pct_50) as percentile_50,
- MIN(pct_75) as percentile_75,
-08_else: NULL as min_value,
- NULL as min_value_over_0,
- NULL as max_value,
- NULL as avg_value,
- NULL as stdev_value,
- NULL as percentile_25,
- NULL as percentile_50,
- NULL as percentile_75,
-
-10_N_dec: SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) as fractional_sum,
-10_else: NULL as fractional_sum,
-
-11_D: CASE
+-- TG-ELSE
+ NULL AS top_patterns,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ MIN("{COL_NAME}") AS min_value,
+ MIN(CASE WHEN "{COL_NAME}"::NUMERIC > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
+ MAX("{COL_NAME}") AS max_value,
+ AVG(CAST("{COL_NAME}"::NUMERIC AS FLOAT)) AS avg_value,
+ STDDEV(CAST("{COL_NAME}"::NUMERIC AS FLOAT)) AS stdev_value,
+ MIN(pct_25) AS percentile_25,
+ MIN(pct_50) AS percentile_50,
+ MIN(pct_75) AS percentile_75,
+-- TG-ELSE
+ NULL AS min_value,
+ NULL AS min_value_over_0,
+ NULL AS max_value,
+ NULL AS avg_value,
+ NULL AS stdev_value,
+ NULL AS percentile_25,
+ NULL AS percentile_50,
+ NULL AS percentile_75,
+-- TG-ENDIF
+-- TG-IF is_N_decimal
+ SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) AS fractional_sum,
+-- TG-ELSE
+ NULL AS fractional_sum,
+-- TG-ENDIF
+-- TG-IF is_type_D
+ CASE
WHEN MIN("{COL_NAME}") IS NULL THEN NULL
ELSE GREATEST(MIN("{COL_NAME}"), '0001-01-01')
- END as min_date,
- MAX("{COL_NAME}") as max_date,
+ END AS min_date,
+ MAX("{COL_NAME}") AS max_date,
SUM(CASE
WHEN <%DATEDIFF_MONTH;"{COL_NAME}";'{RUN_DATE}'%> > 12 THEN 1
ELSE 0
@@ -198,27 +202,31 @@
WHEN <%DATEDIFF_MONTH;'{RUN_DATE}';"{COL_NAME}"%> > 240 THEN 1
ELSE 0
END) AS distant_future_date_ct,
- COUNT(DISTINCT <%DATEDIFF_DAY;"{COL_NAME}";'{RUN_DATE}'%>) as date_days_present,
- COUNT(DISTINCT <%DATEDIFF_WEEK;"{COL_NAME}";'{RUN_DATE}'%>) as date_weeks_present,
- COUNT(DISTINCT <%DATEDIFF_MONTH;"{COL_NAME}";'{RUN_DATE}'%>) as date_months_present,
-11_else: NULL as min_date,
- NULL as max_date,
- NULL as before_1yr_date_ct,
- NULL as before_5yr_date_ct,
- NULL as before_20yr_date_ct,
+ COUNT(DISTINCT <%DATEDIFF_DAY;"{COL_NAME}";'{RUN_DATE}'%>) AS date_days_present,
+ COUNT(DISTINCT <%DATEDIFF_WEEK;"{COL_NAME}";'{RUN_DATE}'%>) AS date_weeks_present,
+ COUNT(DISTINCT <%DATEDIFF_MONTH;"{COL_NAME}";'{RUN_DATE}'%>) AS date_months_present,
+-- TG-ELSE
+ NULL AS min_date,
+ NULL AS max_date,
+ NULL AS before_1yr_date_ct,
+ NULL AS before_5yr_date_ct,
+ NULL AS before_20yr_date_ct,
NULL AS before_100yr_date_ct,
- NULL as within_1yr_date_ct,
- NULL as within_1mo_date_ct,
- NULL as future_date_ct,
- NULL as distant_future_date_ct,
- NULL as date_days_present,
- NULL as date_weeks_present,
- NULL as date_months_present,
-
-12_B: SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
-12_else: NULL as boolean_true_ct,
-
-14_A: ( SELECT COUNT(DISTINCT REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE(
+ NULL AS within_1yr_date_ct,
+ NULL AS within_1mo_date_ct,
+ NULL AS future_date_ct,
+ NULL AS distant_future_date_ct,
+ NULL AS date_days_present,
+ NULL AS date_weeks_present,
+ NULL AS date_months_present,
+-- TG-ENDIF
+-- TG-IF is_type_B
+ SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
+-- TG-ELSE
+ NULL AS boolean_true_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT COUNT(DISTINCT REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE(
"{COL_NAME}", '[a-z]', 'a', 'g'),
'[A-Z]', 'A', 'g'),
'[0-9]', 'N', 'g')
@@ -226,25 +234,25 @@
FROM target_table
WHERE "{COL_NAME}" > ' ' ) AS distinct_pattern_ct,
SUM(SIGN(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REGEXP_REPLACE(TRIM("{COL_NAME}"), ' ', '', 'g')))::BIGINT) AS embedded_space_ct,
- AVG(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REGEXP_REPLACE(TRIM("{COL_NAME}"), ' ', '', 'g'))::FLOAT) AS avg_embedded_spaces,
-14_else: NULL as distinct_pattern_ct,
- NULL as embedded_space_ct,
- NULL as avg_embedded_spaces,
-
-16_all: " '{PROFILE_RUN_ID}' as profile_run_id"
-
-98_all: ' FROM target_table '
-
-99_N: |
+ AVG(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REGEXP_REPLACE(TRIM("{COL_NAME}"), ' ', '', 'g'))::FLOAT) AS avg_embedded_spaces,
+-- TG-ELSE
+ NULL AS distinct_pattern_ct,
+ NULL AS embedded_space_ct,
+ NULL AS avg_embedded_spaces,
+-- TG-ENDIF
+ '{PROFILE_RUN_ID}' AS profile_run_id
+ FROM target_table
+-- TG-IF is_N_sampling
, (SELECT
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}"::NUMERIC) AS pct_25,
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}"::NUMERIC) AS pct_50,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}"::NUMERIC) AS pct_75
- FROM "{DATA_SCHEMA}"."{DATA_TABLE}" LIMIT 1) pctile
-99_N_sampling: |
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" TABLESAMPLE BERNOULLI ({SAMPLE_PERCENT_CALC}) REPEATABLE (64) LIMIT 1) pctile
+-- TG-ENDIF
+-- TG-IF is_N_no_sampling
, (SELECT
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}"::NUMERIC) AS pct_25,
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}"::NUMERIC) AS pct_50,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}"::NUMERIC) AS pct_75
- FROM "{DATA_SCHEMA}"."{DATA_TABLE}" TABLESAMPLE BERNOULLI ({SAMPLE_PERCENT_CALC}) REPEATABLE (64) LIMIT 1) pctile
-99_else: ' '
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" LIMIT 1) pctile
+-- TG-ENDIF
diff --git a/testgen/template/flavors/postgresql/profiling/templated_functions.yaml b/testgen/template/flavors/postgresql/profiling/templated_functions.yaml
index cf9d8541..b447289d 100644
--- a/testgen/template/flavors/postgresql/profiling/templated_functions.yaml
+++ b/testgen/template/flavors/postgresql/profiling/templated_functions.yaml
@@ -1,6 +1,6 @@
DATEDIFF_DAY: DATE({$2}) - DATE({$1})
-DATEDIFF_WEEK: (DATE({$2}) - DATE({$1})) / 7
+DATEDIFF_WEEK: (DATE_TRUNC('week', DATE({$2}))::DATE - DATE_TRUNC('week', DATE({$1}))::DATE) / 7
DATEDIFF_MONTH: (DATE_PART('year', {$2}::TIMESTAMP) - DATE_PART('year', {$1}::TIMESTAMP)) * 12 + (DATE_PART('month', {$2}::TIMESTAMP) - DATE_PART('month', {$1}::TIMESTAMP))
@@ -106,4 +106,3 @@ IS_DATE: CASE
END
ELSE 0
END
-
diff --git a/testgen/template/flavors/redshift/profiling/project_profiling_query.sql b/testgen/template/flavors/redshift/profiling/project_profiling_query.sql
new file mode 100644
index 00000000..53774bb0
--- /dev/null
+++ b/testgen/template/flavors/redshift/profiling/project_profiling_query.sql
@@ -0,0 +1,206 @@
+WITH target_table AS (
+-- TG-IF do_sample
+ SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}" WHERE RAND() <= 1.0 / {PROFILE_SAMPLE_RATIO}
+-- TG-ELSE
+ SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
+-- TG-ENDIF
+)
+SELECT
+ {CONNECTION_ID} AS connection_id,
+ '{PROJECT_CODE}' AS project_code,
+ '{TABLE_GROUPS_ID}' AS table_groups_id,
+ '{DATA_SCHEMA}' AS schema_name,
+ '{RUN_DATE}' AS run_date,
+ '{DATA_TABLE}' AS table_name,
+ {COL_POS} AS position,
+ '{COL_NAME_SANITIZED}' AS column_name,
+ '{COL_TYPE}' AS column_type,
+ '{DB_DATA_TYPE}' AS db_data_type,
+ '{COL_GEN_TYPE}' AS general_type,
+ COUNT(*) AS record_ct,
+ COUNT("{COL_NAME}") AS value_ct,
+ COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
+ SUM(NVL2("{COL_NAME}", 0, 1)) AS null_value_ct,
+-- TG-IF is_type_ADN
+ MIN(LEN("{COL_NAME}")) AS min_length,
+ MAX(LEN("{COL_NAME}")) AS max_length,
+ AVG(NULLIF(LEN("{COL_NAME}"), 0)::FLOAT) AS avg_length,
+-- TG-ELSE
+ NULL AS min_length,
+ NULL AS max_length,
+ NULL AS avg_length,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ COUNT(CASE WHEN TRIM("{COL_NAME}") ~ '^0(\.0*)?$' THEN 1 END) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ SUM(1 - ABS(SIGN("{COL_NAME}")))::BIGINT AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_not_A_not_N
+ NULL AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) AS distinct_std_value_ct,
+ COUNT(CASE WHEN "{COL_NAME}" = '' THEN 1 END) AS zero_length_ct,
+ COUNT(CASE WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1 END) AS lead_space_ct,
+ COUNT(CASE WHEN "{COL_NAME}" ILIKE '"%"' OR "{COL_NAME}" ILIKE '''%''' THEN 1 END) AS quoted_value_ct,
+ COUNT(CASE WHEN "{COL_NAME}" ~ '[0-9]' THEN 1 END) AS includes_digit_ct,
+ COUNT(CASE
+ WHEN LOWER("{COL_NAME}") SIMILAR TO '(.{1,}|-{1,}|\\?{1,}|\\s{1,}|0{2,}|9{2,}|x{2,}|z{2,})' THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
+ 'n/a','#na','none','null','unknown') THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
+ '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
+ '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
+ END) AS filled_value_ct,
+ LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text,
+ LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text,
+ COUNT(CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" <> LOWER("{COL_NAME}") THEN 1 END) AS upper_case_ct,
+ COUNT(CASE WHEN "{COL_NAME}" = LOWER("{COL_NAME}") AND "{COL_NAME}" <> UPPER("{COL_NAME}") THEN 1 END) AS lower_case_ct,
+ COUNT(CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" = LOWER("{COL_NAME}") THEN 1 END) AS non_alpha_ct,
+ COUNT(CASE WHEN TRANSLATE("{COL_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), 'XXXXXXXXXX') <> "{COL_NAME}" THEN 1 END) AS non_printing_ct,
+ SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>) AS numeric_ct,
+ SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>) AS date_ct,
+ CASE
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$'
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'STREET_ADDR'
+ WHEN SUM(CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'STATE_USA'
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^(\\+1|1)?[ .-]?(\\([2-9][0-9]{2}\\)|[2-9][0-9]{2})[ .-]?[2-9][0-9]{2}[ .-]?[0-9]{4}$'
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'PHONE_USA'
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$'
+ AND "{COL_NAME}" NOT LIKE '%://%'
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'EMAIL'
+ WHEN SUM(CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'ZIP_USA'
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^[\\w\\s\-]+\\.(txt|(c|t|p)sv|dat|doc|docx|json|pdf|xlsx|xml)$'
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'FILE_NAME'
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^([0-9]{4}[- ]?){3}[0-9]{4}$'
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'CREDIT_CARD'
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'
+ AND "{COL_NAME}" !~ '\\s(and|but|or|yet)\\s'
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'DELIMITED_DATA'
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$'
+ AND LEFT("{COL_NAME}", 3) NOT BETWEEN '734' AND '749'
+ AND LEFT("{COL_NAME}", 3) <> '666' THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'SSN'
+ END AS std_pattern_match,
+-- TG-ELSE
+ NULL AS distinct_std_value_ct,
+ NULL AS zero_length_ct,
+ NULL AS lead_space_ct,
+ NULL AS quoted_value_ct,
+ NULL AS includes_digit_ct,
+ NULL AS filled_value_ct,
+ NULL AS min_text,
+ NULL AS max_text,
+ NULL AS upper_case_ct,
+ NULL AS lower_case_ct,
+ NULL AS non_alpha_ct,
+ NULL AS non_printing_ct,
+ NULL AS numeric_ct,
+ NULL AS date_ct,
+ NULL AS std_pattern_match,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT LEFT(LISTAGG(pattern, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1000) AS concat_pats
+ FROM (SELECT TOP 5 CAST(COUNT(*) AS VARCHAR(40)) || ' | ' || pattern AS pattern,
+ COUNT(*) AS ct
+ FROM ( SELECT REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE(
+ "{COL_NAME}", '[a-z]', 'a'),
+ '[A-Z]', 'A'),
+ '[0-9]', 'N') AS pattern
+ FROM target_table
+ WHERE "{COL_NAME}" > ' ' AND (SELECT MAX(LEN("{COL_NAME}"))
+ FROM target_table) BETWEEN 3 and {MAX_PATTERN_LENGTH}) p
+ GROUP BY pattern
+ HAVING pattern > ' '
+ ORDER BY COUNT(*) DESC) AS ps) AS top_patterns,
+-- TG-ELSE
+ NULL AS top_patterns,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ MIN("{COL_NAME}") AS min_value,
+ MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
+ MAX("{COL_NAME}") AS max_value,
+ AVG(CAST("{COL_NAME}" AS FLOAT)) AS avg_value,
+ STDDEV(CAST("{COL_NAME}" AS FLOAT)) AS stdev_value,
+ MIN(pct_25) AS percentile_25,
+ MIN(pct_50) AS percentile_50,
+ MIN(pct_75) AS percentile_75,
+-- TG-ELSE
+ NULL AS min_value,
+ NULL AS min_value_over_0,
+ NULL AS max_value,
+ NULL AS avg_value,
+ NULL AS stdev_value,
+ NULL AS percentile_25,
+ NULL AS percentile_50,
+ NULL AS percentile_75,
+-- TG-ENDIF
+-- TG-IF is_N_decimal
+ SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) AS fractional_sum,
+-- TG-ELSE
+ NULL AS fractional_sum,
+-- TG-ENDIF
+-- TG-IF is_type_D
+ CASE
+ WHEN MIN("{COL_NAME}") IS NULL THEN NULL
+ ELSE GREATEST(MIN("{COL_NAME}"), '0001-01-01')
+ END AS min_date,
+ MAX("{COL_NAME}") AS max_date,
+ COUNT(CASE WHEN DATEDIFF('MON', "{COL_NAME}"::DATE, '{RUN_DATE}') > 12 THEN 1 END) AS before_1yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('MON', "{COL_NAME}"::DATE, '{RUN_DATE}') > 60 THEN 1 END) AS before_5yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('MON', "{COL_NAME}"::DATE, '{RUN_DATE}') > 240 THEN 1 END) AS before_20yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('MON', "{COL_NAME}"::DATE, '{RUN_DATE}') > 1200 THEN 1 END) AS before_100yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('DAY', "{COL_NAME}"::DATE, '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1 END) AS within_1yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('DAY', "{COL_NAME}"::DATE, '{RUN_DATE}') BETWEEN 0 AND 30 THEN 1 END) AS within_1mo_date_ct,
+ COUNT(CASE WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 END) AS future_date_ct,
+ COUNT(CASE WHEN DATEDIFF('MON', '{RUN_DATE}', "{COL_NAME}"::DATE) > 240 THEN 1 END) AS distant_future_date_ct,
+ COUNT(DISTINCT DATEDIFF(day, "{COL_NAME}"::DATE, '{RUN_DATE}' ) ) AS date_days_present,
+ COUNT(DISTINCT <%DATEDIFF_WEEK;"{COL_NAME}";'{RUN_DATE}'%>) AS date_weeks_present,
+ COUNT(DISTINCT DATEDIFF(month, "{COL_NAME}"::DATE, '{RUN_DATE}' ) ) AS date_months_present,
+-- TG-ELSE
+ NULL AS min_date,
+ NULL AS max_date,
+ NULL AS before_1yr_date_ct,
+ NULL AS before_5yr_date_ct,
+ NULL AS before_20yr_date_ct,
+ NULL AS before_100yr_date_ct,
+ NULL AS within_1yr_date_ct,
+ NULL AS within_1mo_date_ct,
+ NULL AS future_date_ct,
+ NULL AS distant_future_date_ct,
+ NULL AS date_days_present,
+ NULL AS date_weeks_present,
+ NULL AS date_months_present,
+-- TG-ENDIF
+-- TG-IF is_type_B
+ SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
+-- TG-ELSE
+ NULL AS boolean_true_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT COUNT(DISTINCT REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE(
+ "{COL_NAME}", '[a-z]', 'a'),
+ '[A-Z]', 'A'),
+ '[0-9]', 'N')
+ ) AS pattern_ct
+ FROM target_table
+ WHERE "{COL_NAME}" > ' ' ) AS distinct_pattern_ct,
+ SUM(SIGN(REGEXP_COUNT(TRIM("{COL_NAME}"), ' '))::BIGINT) AS embedded_space_ct,
+ AVG(REGEXP_COUNT(TRIM("{COL_NAME}"), ' ')::FLOAT) AS avg_embedded_spaces,
+-- TG-ELSE
+ NULL AS distinct_pattern_ct,
+ NULL AS embedded_space_ct,
+ NULL AS avg_embedded_spaces,
+-- TG-ENDIF
+ '{PROFILE_RUN_ID}' AS profile_run_id
+ FROM target_table
+-- TG-IF is_type_N
+ , (SELECT
+ PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
+ PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
+ PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" LIMIT 1) pctile
+-- TG-ENDIF
diff --git a/testgen/template/flavors/redshift/profiling/project_profiling_query.yaml b/testgen/template/flavors/redshift/profiling/project_profiling_query.yaml
deleted file mode 100644
index 1055ecd1..00000000
--- a/testgen/template/flavors/redshift/profiling/project_profiling_query.yaml
+++ /dev/null
@@ -1,204 +0,0 @@
----
-01_sampling: |
- WITH target_table AS (
- SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}" WHERE RAND() <= 1.0 / {PROFILE_SAMPLE_RATIO}
- )
- SELECT
-01_else: |
- WITH target_table AS (
- SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
- )
- SELECT
-01_all: |
- {CONNECTION_ID} as connection_id,
- '{PROJECT_CODE}' as project_code,
- '{TABLE_GROUPS_ID}' as table_groups_id,
- '{DATA_SCHEMA}' AS schema_name,
- '{RUN_DATE}' AS run_date,
- '{DATA_TABLE}' AS table_name,
- {COL_POS} AS position,
- '{COL_NAME_SANITIZED}' AS column_name,
- '{COL_TYPE}' AS column_type,
- '{DB_DATA_TYPE}' AS db_data_type,
- '{COL_GEN_TYPE}' AS general_type,
- COUNT(*) AS record_ct,
-
-02_X: |
- COUNT("{COL_NAME}") AS value_ct,
- COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
- SUM(NVL2("{COL_NAME}", 0, 1)) AS null_value_ct,
-02_else: |
- COUNT("{COL_NAME}") AS value_ct,
- COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
- SUM(NVL2("{COL_NAME}", 0, 1)) AS null_value_ct,
-
-03_ADN: MIN(LEN("{COL_NAME}")) AS min_length,
- MAX(LEN("{COL_NAME}")) AS max_length,
- AVG(NULLIF(LEN("{COL_NAME}"), 0)::FLOAT) AS avg_length,
-03_else: NULL as min_length,
- NULL as max_length,
- NULL as avg_length,
-
-04_A: COUNT( CASE WHEN TRIM("{COL_NAME}") ~ '^0(\.0*)?$' THEN 1 END) AS zero_value_ct,
-04_N: SUM( 1 - ABS(SIGN("{COL_NAME}")) )::BIGINT AS zero_value_ct,
-04_else: NULL as zero_value_ct,
-
-05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) as distinct_std_value_ct,
- COUNT( CASE WHEN "{COL_NAME}" = '' THEN 1 END) AS zero_length_ct,
- COUNT( CASE WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1 END ) AS lead_space_ct,
- COUNT( CASE WHEN "{COL_NAME}" ILIKE '"%"' OR "{COL_NAME}" ILIKE '''%''' THEN 1 END ) as quoted_value_ct,
- COUNT( CASE WHEN "{COL_NAME}" ~ '[0-9]' THEN 1 END ) as includes_digit_ct,
- COUNT( CASE
- WHEN LENGTH("{COL_NAME}") > 0 AND "{COL_NAME}" IN ('.', '?', ' ') THEN 1
- WHEN LOWER("{COL_NAME}") SIMILAR TO '(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)' THEN 1
- WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
- 'n/a','#na','none','null','unknown') THEN 1
- WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
- '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
- WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
- '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
- END ) AS filled_value_ct,
- LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text,
- LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text,
- COUNT( CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" <> LOWER("{COL_NAME}") THEN 1 END) AS upper_case_ct,
- COUNT( CASE WHEN "{COL_NAME}" = LOWER("{COL_NAME}") AND "{COL_NAME}" <> UPPER("{COL_NAME}") THEN 1 END) AS lower_case_ct,
- COUNT( CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" = LOWER("{COL_NAME}") THEN 1 END) AS non_alpha_ct,
- COUNT( CASE WHEN TRANSLATE("{COL_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), 'XXXXXXXXXX') <> "{COL_NAME}" THEN 1 END) as non_printing_ct,
- SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>) AS numeric_ct,
- SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>) AS date_ct,
- CASE
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$'
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'STREET_ADDR'
- WHEN SUM( CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'STATE_USA'
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^(\\+1|1)?[ .-]?(\\([2-9][0-9]{2}\\)|[2-9][0-9]{2})[ .-]?[2-9][0-9]{2}[ .-]?[0-9]{4}$'
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'PHONE_USA'
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$'
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'EMAIL'
- WHEN SUM( CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'ZIP_USA'
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[\\w\\s\-]+\\.(txt|(c|t|p)sv|dat|doc|docx|json|pdf|xlsx|xml)$'
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'FILE_NAME'
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^([0-9]{4}[- ]){3}[0-9]{4}$'
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'CREDIT_CARD'
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'
- AND "{COL_NAME}" !~ '\\s(and|but|or|yet)\\s'
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'DELIMITED_DATA'
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$'
- AND LEFT("{COL_NAME}", 3) NOT BETWEEN '734' AND '749'
- AND LEFT("{COL_NAME}", 3) <> '666' THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'SSN'
- END as std_pattern_match,
-05_else: NULL as distinct_std_value_ct,
- NULL as zero_length_ct,
- NULL as lead_space_ct,
- NULL as quoted_value_ct,
- NULL as includes_digit_ct,
- NULL as filled_value_ct,
- NULL as min_text,
- NULL as max_text,
- NULL as upper_case_ct,
- NULL as lower_case_ct,
- NULL as non_alpha_ct,
- NULL as non_printing_ct,
- NULL as numeric_ct,
- NULL as date_ct,
- NULL as std_pattern_match,
-
-06_A: (SELECT LEFT(LISTAGG(pattern, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1000) AS concat_pats
- FROM ( SELECT TOP 5 CAST(COUNT(*) AS VARCHAR(40)) || ' | ' || pattern AS pattern,
- COUNT(*) AS ct
- FROM ( SELECT REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE(
- "{COL_NAME}", '[a-z]', 'a'),
- '[A-Z]', 'A'),
- '[0-9]', 'N') AS pattern
- FROM target_table
- WHERE "{COL_NAME}" > ' ' AND (SELECT MAX(LEN("{COL_NAME}"))
- FROM target_table) BETWEEN 3 and {MAX_PATTERN_LENGTH}) p
- GROUP BY pattern
- HAVING pattern > ' '
- ORDER BY COUNT(*) DESC) as ps) AS top_patterns,
-06_else: NULL as top_patterns,
-
-08_N: MIN("{COL_NAME}") AS min_value,
- MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
- MAX("{COL_NAME}") AS max_value,
- AVG(CAST("{COL_NAME}" AS FLOAT)) AS avg_value,
- STDDEV(CAST("{COL_NAME}" AS FLOAT)) AS stdev_value,
- MIN(pct_25) as percentile_25,
- MIN(pct_50) as percentile_50,
- MIN(pct_75) as percentile_75,
-08_else: NULL as min_value,
- NULL as min_value_over_0,
- NULL as max_value,
- NULL as avg_value,
- NULL as stdev_value,
- NULL as percentile_25,
- NULL as percentile_50,
- NULL as percentile_75,
-
-10_N_dec: SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) as fractional_sum,
-10_else: NULL as fractional_sum,
-
-11_D: CASE
- WHEN MIN("{COL_NAME}") IS NULL THEN NULL
- ELSE GREATEST(MIN("{COL_NAME}"), '0001-01-01')
- END as min_date,
- MAX("{COL_NAME}") as max_date,
- COUNT( CASE WHEN DATEDIFF('MON', "{COL_NAME}"::DATE, '{RUN_DATE}') > 12 THEN 1 END) AS before_1yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('MON', "{COL_NAME}"::DATE, '{RUN_DATE}') > 60 THEN 1 END) AS before_5yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('MON', "{COL_NAME}"::DATE, '{RUN_DATE}') > 240 THEN 1 END) AS before_20yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('MON', "{COL_NAME}"::DATE, '{RUN_DATE}') > 1200 THEN 1 END) AS before_100yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('DAY', "{COL_NAME}"::DATE, '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1 END) AS within_1yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('DAY', "{COL_NAME}"::DATE, '{RUN_DATE}') BETWEEN 0 AND 30 THEN 1 END) AS within_1mo_date_ct,
- COUNT( CASE WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 END) AS future_date_ct,
- COUNT( CASE WHEN DATEDIFF('MON', '{RUN_DATE}', "{COL_NAME}"::DATE) > 240 THEN 1 END) AS distant_future_date_ct,
- COUNT(DISTINCT DATEDIFF(day, "{COL_NAME}"::DATE, '{RUN_DATE}' ) ) as date_days_present,
- COUNT(DISTINCT DATEDIFF(week, "{COL_NAME}"::DATE, '{RUN_DATE}' ) ) as date_weeks_present,
- COUNT(DISTINCT DATEDIFF(month, "{COL_NAME}"::DATE, '{RUN_DATE}' ) ) as date_months_present,
-11_else: NULL as min_date,
- NULL as max_date,
- NULL as before_1yr_date_ct,
- NULL as before_5yr_date_ct,
- NULL as before_20yr_date_ct,
- NULL AS before_100yr_date_ct,
- NULL as within_1yr_date_ct,
- NULL as within_1mo_date_ct,
- NULL as future_date_ct,
- NULL as distant_future_date_ct,
- NULL as date_days_present,
- NULL as date_weeks_present,
- NULL as date_months_present,
-
-12_B: SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
-12_else: NULL as boolean_true_ct,
-
-14_A: ( SELECT COUNT(DISTINCT REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE(
- "{COL_NAME}", '[a-z]', 'a'),
- '[A-Z]', 'A'),
- '[0-9]', 'N')
- ) AS pattern_ct
- FROM target_table
- WHERE "{COL_NAME}" > ' ' ) AS distinct_pattern_ct,
- SUM(SIGN(REGEXP_COUNT(TRIM("{COL_NAME}"), ' '))::BIGINT) AS embedded_space_ct,
- AVG(REGEXP_COUNT(TRIM("{COL_NAME}"), ' ')::FLOAT) AS avg_embedded_spaces,
-14_else: NULL as distinct_pattern_ct,
- NULL as embedded_space_ct,
- NULL as avg_embedded_spaces,
-
-16_all: " '{PROFILE_RUN_ID}' as profile_run_id"
-
-98_all: ' FROM target_table'
-
-99_N: |
- , (SELECT
- PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
- PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
- PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
- FROM "{DATA_SCHEMA}"."{DATA_TABLE}" LIMIT 1) pctile
-99_N_sampling: |
- , (SELECT
- PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
- PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
- PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
- FROM "{DATA_SCHEMA}"."{DATA_TABLE}" LIMIT 1) pctile
-99_else: ' '
diff --git a/testgen/template/flavors/redshift/profiling/templated_functions.yaml b/testgen/template/flavors/redshift/profiling/templated_functions.yaml
index 4953e254..dffaa4f8 100644
--- a/testgen/template/flavors/redshift/profiling/templated_functions.yaml
+++ b/testgen/template/flavors/redshift/profiling/templated_functions.yaml
@@ -99,3 +99,4 @@ IS_DATE: CASE
ELSE 0
END
+DATEDIFF_WEEK: (DATE_TRUNC('week', {$2}::DATE)::DATE - DATE_TRUNC('week', {$1}::DATE)::DATE) / 7
diff --git a/testgen/template/flavors/redshift_spectrum/profiling/project_profiling_query.sql b/testgen/template/flavors/redshift_spectrum/profiling/project_profiling_query.sql
new file mode 100644
index 00000000..53774bb0
--- /dev/null
+++ b/testgen/template/flavors/redshift_spectrum/profiling/project_profiling_query.sql
@@ -0,0 +1,206 @@
+WITH target_table AS (
+-- TG-IF do_sample
+ SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}" WHERE RAND() <= 1.0 / {PROFILE_SAMPLE_RATIO}
+-- TG-ELSE
+ SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
+-- TG-ENDIF
+)
+SELECT
+ {CONNECTION_ID} AS connection_id,
+ '{PROJECT_CODE}' AS project_code,
+ '{TABLE_GROUPS_ID}' AS table_groups_id,
+ '{DATA_SCHEMA}' AS schema_name,
+ '{RUN_DATE}' AS run_date,
+ '{DATA_TABLE}' AS table_name,
+ {COL_POS} AS position,
+ '{COL_NAME_SANITIZED}' AS column_name,
+ '{COL_TYPE}' AS column_type,
+ '{DB_DATA_TYPE}' AS db_data_type,
+ '{COL_GEN_TYPE}' AS general_type,
+ COUNT(*) AS record_ct,
+ COUNT("{COL_NAME}") AS value_ct,
+ COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
+ SUM(NVL2("{COL_NAME}", 0, 1)) AS null_value_ct,
+-- TG-IF is_type_ADN
+ MIN(LEN("{COL_NAME}")) AS min_length,
+ MAX(LEN("{COL_NAME}")) AS max_length,
+ AVG(NULLIF(LEN("{COL_NAME}"), 0)::FLOAT) AS avg_length,
+-- TG-ELSE
+ NULL AS min_length,
+ NULL AS max_length,
+ NULL AS avg_length,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ COUNT(CASE WHEN TRIM("{COL_NAME}") ~ '^0(\.0*)?$' THEN 1 END) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ SUM(1 - ABS(SIGN("{COL_NAME}")))::BIGINT AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_not_A_not_N
+ NULL AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) AS distinct_std_value_ct,
+ COUNT(CASE WHEN "{COL_NAME}" = '' THEN 1 END) AS zero_length_ct,
+ COUNT(CASE WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1 END) AS lead_space_ct,
+ COUNT(CASE WHEN "{COL_NAME}" ILIKE '"%"' OR "{COL_NAME}" ILIKE '''%''' THEN 1 END) AS quoted_value_ct,
+ COUNT(CASE WHEN "{COL_NAME}" ~ '[0-9]' THEN 1 END) AS includes_digit_ct,
+ COUNT(CASE
+ WHEN LOWER("{COL_NAME}") SIMILAR TO '(.{1,}|-{1,}|\\?{1,}|\\s{1,}|0{2,}|9{2,}|x{2,}|z{2,})' THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
+ 'n/a','#na','none','null','unknown') THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
+ '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
+ '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
+ END) AS filled_value_ct,
+ LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text,
+ LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text,
+ COUNT(CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" <> LOWER("{COL_NAME}") THEN 1 END) AS upper_case_ct,
+ COUNT(CASE WHEN "{COL_NAME}" = LOWER("{COL_NAME}") AND "{COL_NAME}" <> UPPER("{COL_NAME}") THEN 1 END) AS lower_case_ct,
+ COUNT(CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" = LOWER("{COL_NAME}") THEN 1 END) AS non_alpha_ct,
+ COUNT(CASE WHEN TRANSLATE("{COL_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), 'XXXXXXXXXX') <> "{COL_NAME}" THEN 1 END) AS non_printing_ct,
+ SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>) AS numeric_ct,
+ SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>) AS date_ct,
+ CASE
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$'
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'STREET_ADDR'
+ WHEN SUM(CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'STATE_USA'
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^(\\+1|1)?[ .-]?(\\([2-9][0-9]{2}\\)|[2-9][0-9]{2})[ .-]?[2-9][0-9]{2}[ .-]?[0-9]{4}$'
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'PHONE_USA'
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$'
+ AND "{COL_NAME}" NOT LIKE '%://%'
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'EMAIL'
+ WHEN SUM(CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'ZIP_USA'
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^[\\w\\s\-]+\\.(txt|(c|t|p)sv|dat|doc|docx|json|pdf|xlsx|xml)$'
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'FILE_NAME'
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^([0-9]{4}[- ]?){3}[0-9]{4}$'
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'CREDIT_CARD'
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'
+ AND "{COL_NAME}" !~ '\\s(and|but|or|yet)\\s'
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'DELIMITED_DATA'
+ WHEN SUM(CASE WHEN "{COL_NAME}" ~ '^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$'
+ AND LEFT("{COL_NAME}", 3) NOT BETWEEN '734' AND '749'
+ AND LEFT("{COL_NAME}", 3) <> '666' THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'SSN'
+ END AS std_pattern_match,
+-- TG-ELSE
+ NULL AS distinct_std_value_ct,
+ NULL AS zero_length_ct,
+ NULL AS lead_space_ct,
+ NULL AS quoted_value_ct,
+ NULL AS includes_digit_ct,
+ NULL AS filled_value_ct,
+ NULL AS min_text,
+ NULL AS max_text,
+ NULL AS upper_case_ct,
+ NULL AS lower_case_ct,
+ NULL AS non_alpha_ct,
+ NULL AS non_printing_ct,
+ NULL AS numeric_ct,
+ NULL AS date_ct,
+ NULL AS std_pattern_match,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT LEFT(LISTAGG(pattern, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1000) AS concat_pats
+ FROM (SELECT TOP 5 CAST(COUNT(*) AS VARCHAR(40)) || ' | ' || pattern AS pattern,
+ COUNT(*) AS ct
+ FROM ( SELECT REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE(
+ "{COL_NAME}", '[a-z]', 'a'),
+ '[A-Z]', 'A'),
+ '[0-9]', 'N') AS pattern
+ FROM target_table
+ WHERE "{COL_NAME}" > ' ' AND (SELECT MAX(LEN("{COL_NAME}"))
+ FROM target_table) BETWEEN 3 and {MAX_PATTERN_LENGTH}) p
+ GROUP BY pattern
+ HAVING pattern > ' '
+ ORDER BY COUNT(*) DESC) AS ps) AS top_patterns,
+-- TG-ELSE
+ NULL AS top_patterns,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ MIN("{COL_NAME}") AS min_value,
+ MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
+ MAX("{COL_NAME}") AS max_value,
+ AVG(CAST("{COL_NAME}" AS FLOAT)) AS avg_value,
+ STDDEV(CAST("{COL_NAME}" AS FLOAT)) AS stdev_value,
+ MIN(pct_25) AS percentile_25,
+ MIN(pct_50) AS percentile_50,
+ MIN(pct_75) AS percentile_75,
+-- TG-ELSE
+ NULL AS min_value,
+ NULL AS min_value_over_0,
+ NULL AS max_value,
+ NULL AS avg_value,
+ NULL AS stdev_value,
+ NULL AS percentile_25,
+ NULL AS percentile_50,
+ NULL AS percentile_75,
+-- TG-ENDIF
+-- TG-IF is_N_decimal
+ SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) AS fractional_sum,
+-- TG-ELSE
+ NULL AS fractional_sum,
+-- TG-ENDIF
+-- TG-IF is_type_D
+ CASE
+ WHEN MIN("{COL_NAME}") IS NULL THEN NULL
+ ELSE GREATEST(MIN("{COL_NAME}"), '0001-01-01')
+ END AS min_date,
+ MAX("{COL_NAME}") AS max_date,
+ COUNT(CASE WHEN DATEDIFF('MON', "{COL_NAME}"::DATE, '{RUN_DATE}') > 12 THEN 1 END) AS before_1yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('MON', "{COL_NAME}"::DATE, '{RUN_DATE}') > 60 THEN 1 END) AS before_5yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('MON', "{COL_NAME}"::DATE, '{RUN_DATE}') > 240 THEN 1 END) AS before_20yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('MON', "{COL_NAME}"::DATE, '{RUN_DATE}') > 1200 THEN 1 END) AS before_100yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('DAY', "{COL_NAME}"::DATE, '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1 END) AS within_1yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('DAY', "{COL_NAME}"::DATE, '{RUN_DATE}') BETWEEN 0 AND 30 THEN 1 END) AS within_1mo_date_ct,
+ COUNT(CASE WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 END) AS future_date_ct,
+ COUNT(CASE WHEN DATEDIFF('MON', '{RUN_DATE}', "{COL_NAME}"::DATE) > 240 THEN 1 END) AS distant_future_date_ct,
+ COUNT(DISTINCT DATEDIFF(day, "{COL_NAME}"::DATE, '{RUN_DATE}' ) ) AS date_days_present,
+ COUNT(DISTINCT <%DATEDIFF_WEEK;"{COL_NAME}";'{RUN_DATE}'%>) AS date_weeks_present,
+ COUNT(DISTINCT DATEDIFF(month, "{COL_NAME}"::DATE, '{RUN_DATE}' ) ) AS date_months_present,
+-- TG-ELSE
+ NULL AS min_date,
+ NULL AS max_date,
+ NULL AS before_1yr_date_ct,
+ NULL AS before_5yr_date_ct,
+ NULL AS before_20yr_date_ct,
+ NULL AS before_100yr_date_ct,
+ NULL AS within_1yr_date_ct,
+ NULL AS within_1mo_date_ct,
+ NULL AS future_date_ct,
+ NULL AS distant_future_date_ct,
+ NULL AS date_days_present,
+ NULL AS date_weeks_present,
+ NULL AS date_months_present,
+-- TG-ENDIF
+-- TG-IF is_type_B
+ SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
+-- TG-ELSE
+ NULL AS boolean_true_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT COUNT(DISTINCT REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE(
+ "{COL_NAME}", '[a-z]', 'a'),
+ '[A-Z]', 'A'),
+ '[0-9]', 'N')
+ ) AS pattern_ct
+ FROM target_table
+ WHERE "{COL_NAME}" > ' ' ) AS distinct_pattern_ct,
+ SUM(SIGN(REGEXP_COUNT(TRIM("{COL_NAME}"), ' '))::BIGINT) AS embedded_space_ct,
+ AVG(REGEXP_COUNT(TRIM("{COL_NAME}"), ' ')::FLOAT) AS avg_embedded_spaces,
+-- TG-ELSE
+ NULL AS distinct_pattern_ct,
+ NULL AS embedded_space_ct,
+ NULL AS avg_embedded_spaces,
+-- TG-ENDIF
+ '{PROFILE_RUN_ID}' AS profile_run_id
+ FROM target_table
+-- TG-IF is_type_N
+ , (SELECT
+ PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
+ PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
+ PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" LIMIT 1) pctile
+-- TG-ENDIF
diff --git a/testgen/template/flavors/redshift_spectrum/profiling/project_profiling_query.yaml b/testgen/template/flavors/redshift_spectrum/profiling/project_profiling_query.yaml
deleted file mode 100644
index 0e0b6401..00000000
--- a/testgen/template/flavors/redshift_spectrum/profiling/project_profiling_query.yaml
+++ /dev/null
@@ -1,204 +0,0 @@
----
-01_sampling: |
- WITH target_table AS (
- SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}" WHERE RAND() <= 1.0 / {PROFILE_SAMPLE_RATIO}
- )
- SELECT
-01_else: |
- WITH target_table AS (
- SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
- )
- SELECT
-01_all: |
- {CONNECTION_ID} as connection_id,
- '{PROJECT_CODE}' as project_code,
- '{TABLE_GROUPS_ID}' as table_groups_id,
- '{DATA_SCHEMA}' AS schema_name,
- '{RUN_DATE}' AS run_date,
- '{DATA_TABLE}' AS table_name,
- {COL_POS} AS position,
- '{COL_NAME_SANITIZED}' AS column_name,
- '{COL_TYPE}' AS column_type,
- '{DB_DATA_TYPE}' AS db_data_type,
- '{COL_GEN_TYPE}' AS general_type,
- COUNT(*) AS record_ct,
-
-02_X: |
- COUNT("{COL_NAME}") AS value_ct,
- COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
- SUM(NVL2("{COL_NAME}", 0, 1)) AS null_value_ct,
-02_else: |
- COUNT("{COL_NAME}") AS value_ct,
- COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
- SUM(NVL2("{COL_NAME}", 0, 1)) AS null_value_ct,
-
-03_ADN: MIN(LEN("{COL_NAME}")) AS min_length,
- MAX(LEN("{COL_NAME}")) AS max_length,
- AVG(NULLIF(LEN("{COL_NAME}"), 0)::FLOAT) AS avg_length,
-03_else: NULL as min_length,
- NULL as max_length,
- NULL as avg_length,
-
-04_A: COUNT( CASE WHEN TRIM("{COL_NAME}") ~ '^0(\.0*)?$' THEN 1 END) AS zero_value_ct,
-04_N: SUM( 1 - ABS(SIGN("{COL_NAME}")) )::BIGINT AS zero_value_ct,
-04_else: NULL as zero_value_ct,
-
-05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) as distinct_std_value_ct,
- COUNT( CASE WHEN "{COL_NAME}" = '' THEN 1 END) AS zero_length_ct,
- COUNT( CASE WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1 END ) AS lead_space_ct,
- COUNT( CASE WHEN "{COL_NAME}" ILIKE '"%"' OR "{COL_NAME}" ILIKE '''%''' THEN 1 END ) as quoted_value_ct,
- COUNT( CASE WHEN "{COL_NAME}" ~ '[0-9]' THEN 1 END ) as includes_digit_ct,
- COUNT( CASE
- WHEN LENGTH("{COL_NAME}") > 0 AND "{COL_NAME}" IN ('.', '?', ' ') THEN 1
- WHEN LOWER("{COL_NAME}") SIMILAR TO '(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)' THEN 1
- WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
- 'n/a','#na','none','null','unknown') THEN 1
- WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
- '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
- WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
- '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
- END ) AS filled_value_ct,
- LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text,
- LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text,
- COUNT( CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" <> LOWER("{COL_NAME}") THEN 1 END) AS upper_case_ct,
- COUNT( CASE WHEN "{COL_NAME}" = LOWER("{COL_NAME}") AND "{COL_NAME}" <> UPPER("{COL_NAME}") THEN 1 END) AS lower_case_ct,
- COUNT( CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" = LOWER("{COL_NAME}") THEN 1 END) AS non_alpha_ct,
- COUNT( CASE WHEN TRANSLATE("{COL_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), 'XXXXXXXXXX') <> "{COL_NAME}" THEN 1 END) as non_printing_ct,
- SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>) AS numeric_ct,
- SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>) AS date_ct,
- CASE
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$'
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'STREET_ADDR'
- WHEN SUM( CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'STATE_USA'
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^(\\+1|1)?[ .-]?(\\([2-9][0-9]{2}\\)|[2-9][0-9]{2})[ .-]?[2-9][0-9]{2}[ .-]?[0-9]{4}$'
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'PHONE_USA'
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$'
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'EMAIL'
- WHEN SUM( CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'ZIP_USA'
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[\\w\\s\-]+\\.(txt|(c|t|p)sv|dat|doc|docx|json|pdf|xlsx|xml)$'
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'FILE_NAME'
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^([0-9]{4}[- ]){3}[0-9]{4}$'
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'CREDIT_CARD'
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'
- AND "{COL_NAME}" !~ '\\s(and|but|or|yet)\\s'
- THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'DELIMITED_DATA'
- WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$'
- AND LEFT("{COL_NAME}", 3) NOT BETWEEN '734' AND '749'
- AND LEFT("{COL_NAME}", 3) <> '666' THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'SSN'
- END as std_pattern_match,
-05_else: NULL as distinct_std_value_ct,
- NULL as zero_length_ct,
- NULL as lead_space_ct,
- NULL as quoted_value_ct,
- NULL as includes_digit_ct,
- NULL as filled_value_ct,
- NULL as min_text,
- NULL as max_text,
- NULL as upper_case_ct,
- NULL as lower_case_ct,
- NULL as non_alpha_ct,
- NULL as non_printing_ct,
- NULL as numeric_ct,
- NULL as date_ct,
- NULL as std_pattern_match,
-
-06_A: (SELECT LEFT(LISTAGG(pattern, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1000) AS concat_pats
- FROM ( SELECT TOP 5 CAST(COUNT(*) AS VARCHAR(40)) || ' | ' || pattern AS pattern,
- COUNT(*) AS ct
- FROM ( SELECT REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE(
- "{COL_NAME}", '[a-z]', 'a'),
- '[A-Z]', 'A'),
- '[0-9]', 'N') AS pattern
- FROM target_table
- WHERE "{COL_NAME}" > ' ' AND (SELECT MAX(LEN("{COL_NAME}"))
- FROM target_table) BETWEEN 3 and {MAX_PATTERN_LENGTH}) p
- GROUP BY pattern
- HAVING pattern > ' '
- ORDER BY COUNT(*) DESC) as ps) AS top_patterns,
-06_else: NULL as top_patterns,
-
-08_N: MIN("{COL_NAME}") AS min_value,
- MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
- MAX("{COL_NAME}") AS max_value,
- AVG(CAST("{COL_NAME}" AS FLOAT)) AS avg_value,
- STDDEV(CAST("{COL_NAME}" AS FLOAT)) AS stdev_value,
- MIN(pct_25) as percentile_25,
- MIN(pct_50) as percentile_50,
- MIN(pct_75) as percentile_75,
-08_else: NULL as min_value,
- NULL as min_value_over_0,
- NULL as max_value,
- NULL as avg_value,
- NULL as stdev_value,
- NULL as percentile_25,
- NULL as percentile_50,
- NULL as percentile_75,
-
-10_N_dec: SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) as fractional_sum,
-10_else: NULL as fractional_sum,
-
-11_D: CASE
- WHEN MIN("{COL_NAME}") IS NULL THEN NULL
- ELSE GREATEST(MIN("{COL_NAME}"), '0001-01-01')
- END as min_date,
- MAX("{COL_NAME}") as max_date,
- COUNT( CASE WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 12 THEN 1 END) AS before_1yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 60 THEN 1 END) AS before_5yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 240 THEN 1 END) AS before_20yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 1200 THEN 1 END) AS before_100yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1 END) AS within_1yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 30 THEN 1 END) AS within_1mo_date_ct,
- COUNT( CASE WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 END) AS future_date_ct,
- COUNT( CASE WHEN DATEDIFF('MON', '{RUN_DATE}', "{COL_NAME}") > 240 THEN 1 END) AS distant_future_date_ct,
- COUNT(DISTINCT DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}' ) ) as date_days_present,
- COUNT(DISTINCT DATEDIFF(week, "{COL_NAME}", '{RUN_DATE}' ) ) as date_weeks_present,
- COUNT(DISTINCT DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}' ) ) as date_months_present,
-11_else: NULL as min_date,
- NULL as max_date,
- NULL as before_1yr_date_ct,
- NULL as before_5yr_date_ct,
- NULL as before_20yr_date_ct,
- NULL AS before_100yr_date_ct,
- NULL as within_1yr_date_ct,
- NULL as within_1mo_date_ct,
- NULL as future_date_ct,
- NULL as distant_future_date_ct,
- NULL as date_days_present,
- NULL as date_weeks_present,
- NULL as date_months_present,
-
-12_B: SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
-12_else: NULL as boolean_true_ct,
-
-14_A: ( SELECT COUNT(DISTINCT REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE(
- "{COL_NAME}", '[a-z]', 'a'),
- '[A-Z]', 'A'),
- '[0-9]', 'N')
- ) AS pattern_ct
- FROM target_table
- WHERE "{COL_NAME}" > ' ' ) AS distinct_pattern_ct,
- SUM(SIGN(REGEXP_COUNT(TRIM("{COL_NAME}"), ' '))::BIGINT) AS embedded_space_ct,
- AVG(REGEXP_COUNT(TRIM("{COL_NAME}"), ' ')::FLOAT) AS avg_embedded_spaces,
-14_else: NULL as distinct_pattern_ct,
- NULL as embedded_space_ct,
- NULL as avg_embedded_spaces,
-
-16_all: " '{PROFILE_RUN_ID}' as profile_run_id"
-
-98_all: ' FROM target_table'
-
-99_N: |
- , (SELECT
- PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
- PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
- PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
- FROM "{DATA_SCHEMA}"."{DATA_TABLE}" LIMIT 1) pctile
-99_N_sampling: |
- , (SELECT
- PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
- PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
- PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
- FROM "{DATA_SCHEMA}"."{DATA_TABLE}" LIMIT 1) pctile
-99_else: ' '
diff --git a/testgen/template/flavors/redshift_spectrum/profiling/templated_functions.yaml b/testgen/template/flavors/redshift_spectrum/profiling/templated_functions.yaml
index 4953e254..dffaa4f8 100644
--- a/testgen/template/flavors/redshift_spectrum/profiling/templated_functions.yaml
+++ b/testgen/template/flavors/redshift_spectrum/profiling/templated_functions.yaml
@@ -99,3 +99,4 @@ IS_DATE: CASE
ELSE 0
END
+DATEDIFF_WEEK: (DATE_TRUNC('week', {$2}::DATE)::DATE - DATE_TRUNC('week', {$1}::DATE)::DATE) / 7
diff --git a/testgen/template/flavors/sap_hana/data_chars/get_schema_ddf.sql b/testgen/template/flavors/sap_hana/data_chars/get_schema_ddf.sql
new file mode 100644
index 00000000..8a0838fd
--- /dev/null
+++ b/testgen/template/flavors/sap_hana/data_chars/get_schema_ddf.sql
@@ -0,0 +1,41 @@
+SELECT
+ c.SCHEMA_NAME AS schema_name,
+ c.TABLE_NAME AS table_name,
+ c.COLUMN_NAME AS column_name,
+ CASE
+ WHEN c.DATA_TYPE_NAME IN ('NVARCHAR', 'VARCHAR', 'NCHAR', 'CHAR', 'ALPHANUM', 'SHORTTEXT') THEN 'char(' || c.LENGTH || ')'
+ WHEN c.DATA_TYPE_NAME = 'DECIMAL' AND c.SCALE = 0 THEN 'bigint'
+ WHEN c.DATA_TYPE_NAME = 'DECIMAL' AND c.SCALE > 0 THEN 'numeric(' || c.LENGTH || ',' || c.SCALE || ')'
+ WHEN c.DATA_TYPE_NAME IN ('DOUBLE', 'REAL', 'SMALLDECIMAL') THEN 'numeric'
+ WHEN c.DATA_TYPE_NAME IN ('TIMESTAMP', 'SECONDDATE') THEN 'timestamp'
+ ELSE LOWER(c.DATA_TYPE_NAME)
+ END AS column_type,
+ CASE
+ WHEN c.DATA_TYPE_NAME IN ('NVARCHAR', 'VARCHAR', 'NCHAR', 'CHAR', 'ALPHANUM', 'SHORTTEXT') THEN c.DATA_TYPE_NAME || '(' || c.LENGTH || ')'
+ WHEN c.DATA_TYPE_NAME = 'DECIMAL' THEN 'DECIMAL(' || c.LENGTH || ',' || c.SCALE || ')'
+ ELSE c.DATA_TYPE_NAME
+ END AS db_data_type,
+ c.POSITION AS ordinal_position,
+ CASE
+ WHEN c.DATA_TYPE_NAME IN ('NVARCHAR', 'VARCHAR', 'NCHAR', 'CHAR', 'ALPHANUM', 'SHORTTEXT')
+ THEN 'A'
+ WHEN c.DATA_TYPE_NAME = 'BOOLEAN'
+ THEN 'B'
+ WHEN c.DATA_TYPE_NAME IN ('DATE', 'TIMESTAMP', 'SECONDDATE')
+ THEN 'D'
+ WHEN c.DATA_TYPE_NAME = 'TIME'
+ THEN 'T'
+ WHEN c.DATA_TYPE_NAME IN ('INTEGER', 'BIGINT', 'SMALLINT', 'TINYINT', 'DECIMAL', 'DOUBLE', 'REAL', 'SMALLDECIMAL')
+ THEN 'N'
+ ELSE 'X'
+ END AS general_type,
+ CASE
+ WHEN c.DATA_TYPE_NAME = 'DECIMAL' AND c.SCALE > 0 THEN 1
+ WHEN c.DATA_TYPE_NAME IN ('DOUBLE', 'REAL', 'SMALLDECIMAL') THEN 1
+ ELSE 0
+ END AS is_decimal,
+ t.RECORD_COUNT AS approx_record_ct
+FROM SYS.TABLE_COLUMNS c
+LEFT JOIN SYS.M_TABLES t ON c.SCHEMA_NAME = t.SCHEMA_NAME AND c.TABLE_NAME = t.TABLE_NAME
+WHERE c.SCHEMA_NAME = '{DATA_SCHEMA}' {TABLE_CRITERIA}
+ORDER BY c.SCHEMA_NAME, c.TABLE_NAME, c.POSITION
diff --git a/testgen/template/flavors/sap_hana/gen_query_tests/gen_Dupe_Rows.sql b/testgen/template/flavors/sap_hana/gen_query_tests/gen_Dupe_Rows.sql
new file mode 100644
index 00000000..117ffe06
--- /dev/null
+++ b/testgen/template/flavors/sap_hana/gen_query_tests/gen_Dupe_Rows.sql
@@ -0,0 +1,55 @@
+WITH latest_run AS (
+ -- Latest complete profiling run before as-of-date
+ SELECT MAX(run_date) AS last_run_date
+ FROM profile_results
+ WHERE table_groups_id = :TABLE_GROUPS_ID ::UUID
+ AND run_date::DATE <= :AS_OF_DATE ::DATE
+),
+selected_tables AS (
+ SELECT profile_run_id, schema_name, table_name,
+ STRING_AGG(:QUOTE || column_name || :QUOTE, ', ' ORDER BY position) AS groupby_names
+ FROM profile_results p
+ INNER JOIN latest_run lr ON p.run_date = lr.last_run_date
+ WHERE table_groups_id = :TABLE_GROUPS_ID ::UUID
+ -- Skip X types - SAP HANA does not allow grouping by LOB types like BLOB, CLOB, NCLOB, TEXT, BINTEXT
+ AND general_type <> 'X'
+ GROUP BY profile_run_id, schema_name, table_name
+)
+INSERT INTO test_definitions (
+ table_groups_id, test_suite_id, test_type,
+ schema_name, table_name,
+ test_active, last_auto_gen_date, profiling_as_of_date, profile_run_id,
+ groupby_names, skip_errors
+)
+SELECT
+ :TABLE_GROUPS_ID ::UUID AS table_groups_id,
+ :TEST_SUITE_ID ::UUID AS test_suite_id,
+ 'Dupe_Rows' AS test_type,
+ s.schema_name,
+ s.table_name,
+ 'Y' AS test_active,
+ :RUN_DATE ::TIMESTAMP AS last_auto_gen_date,
+ :AS_OF_DATE ::TIMESTAMP AS profiling_as_of_date,
+ s.profile_run_id,
+ s.groupby_names,
+ 0 AS skip_errors
+FROM selected_tables s
+ -- Only insert if test type is active
+WHERE EXISTS (SELECT 1 FROM test_types WHERE test_type = 'Dupe_Rows' AND active = 'Y')
+ -- Only insert if test type is included in generation set
+ AND EXISTS (SELECT 1 FROM generation_sets WHERE test_type = 'Dupe_Rows' AND generation_set = :GENERATION_SET)
+
+-- Match "uix_td_autogen_table" unique index exactly
+ON CONFLICT (test_suite_id, test_type, schema_name, table_name)
+WHERE last_auto_gen_date IS NOT NULL
+ AND table_name IS NOT NULL
+ AND column_name IS NULL
+
+-- Update tests if they already exist
+DO UPDATE SET
+ test_active = EXCLUDED.test_active,
+ last_auto_gen_date = EXCLUDED.last_auto_gen_date,
+ groupby_names = EXCLUDED.groupby_names,
+ skip_errors = EXCLUDED.skip_errors
+-- Ignore locked tests
+WHERE test_definitions.lock_refresh = 'N';
diff --git a/testgen/template/flavors/sap_hana/gen_query_tests/gen_Freshness_Trend.sql b/testgen/template/flavors/sap_hana/gen_query_tests/gen_Freshness_Trend.sql
new file mode 100644
index 00000000..ae947a22
--- /dev/null
+++ b/testgen/template/flavors/sap_hana/gen_query_tests/gen_Freshness_Trend.sql
@@ -0,0 +1,193 @@
+WITH latest_run AS (
+ -- Latest complete profiling run before as-of-date
+ SELECT MAX(run_date) AS last_run_date
+ FROM profile_results
+ WHERE table_groups_id = :TABLE_GROUPS_ID ::UUID
+ AND run_date::DATE <= :AS_OF_DATE ::DATE
+),
+latest_results AS (
+ -- Column results for latest run
+ SELECT p.profile_run_id, p.schema_name, p.table_name, p.column_name,
+ p.functional_data_type, p.general_type,
+ p.distinct_value_ct, p.record_ct, p.null_value_ct,
+ p.max_value, p.min_value, p.avg_value, p.stdev_value
+ FROM profile_results p
+ INNER JOIN latest_run lr ON p.run_date = lr.last_run_date
+ INNER JOIN data_table_chars dtc ON (
+ dtc.table_groups_id = p.table_groups_id
+ AND dtc.schema_name = p.schema_name
+ AND dtc.table_name = p.table_name
+ -- Ignore dropped tables
+ AND dtc.drop_date IS NULL
+ )
+ WHERE p.table_groups_id = :TABLE_GROUPS_ID ::UUID
+),
+-- IDs - TOP 2
+id_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type, distinct_value_ct,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY
+ CASE
+ WHEN functional_data_type ILIKE 'ID-Unique%' THEN 1
+ WHEN functional_data_type = 'ID-Secondary' THEN 2
+ ELSE 3
+ END, distinct_value_ct DESC, column_name
+ ) AS rank
+ FROM latest_results
+ WHERE general_type IN ('A', 'D', 'N')
+ AND functional_data_type ILIKE 'ID%'
+),
+-- Process Date - TOP 1
+process_date_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type, distinct_value_ct,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY
+ CASE
+ WHEN column_name ILIKE '%mod%' THEN 1
+ WHEN column_name ILIKE '%up%' THEN 1
+ WHEN column_name ILIKE '%cr%' THEN 2
+ WHEN column_name ILIKE '%in%' THEN 2
+ END, distinct_value_ct DESC, column_name
+ ) AS rank
+ FROM latest_results
+ WHERE general_type IN ('A', 'D', 'N')
+ AND functional_data_type ILIKE 'process%'
+),
+-- Transaction Date - TOP 1
+tran_date_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type, distinct_value_ct,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY distinct_value_ct DESC, column_name
+ ) AS rank
+ FROM latest_results
+ WHERE general_type IN ('A', 'D', 'N')
+ AND functional_data_type ILIKE 'transactional date%'
+ OR functional_data_type ILIKE 'period%'
+ OR functional_data_type = 'timestamp'
+),
+-- Numeric Measures
+numeric_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type,
+ -- Weighted score
+ (
+ 0.25 * (distinct_value_ct * 1.0 / NULLIF(record_ct, 0)) +
+ 0.15 * ((max_value - min_value) / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) +
+ 0.10 * (LEAST(1, LOG(GREATEST(distinct_value_ct, 2))) / LOG(GREATEST(record_ct, 2))) +
+ 0.40 * (stdev_value / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) +
+ 0.10 * (1.0 - (null_value_ct * 1.0 / NULLIF(NULLIF(record_ct, 0), 1)))
+ ) AS change_detection_score
+ FROM latest_results
+ WHERE general_type = 'N'
+ AND (
+ functional_data_type ILIKE 'Measure%'
+ OR functional_data_type IN ('Sequence', 'Constant')
+ )
+),
+numeric_cols_ranked AS (
+ SELECT *,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY change_detection_score DESC, column_name
+ ) AS rank
+ FROM numeric_cols
+ WHERE change_detection_score IS NOT NULL
+),
+combined AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'ID' AS element_type, general_type, 10 + rank AS fingerprint_order
+ FROM id_cols
+ WHERE rank <= 2
+ UNION ALL
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'DATE_P' AS element_type, general_type, 20 + rank AS fingerprint_order
+ FROM process_date_cols
+ WHERE rank = 1
+ UNION ALL
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'DATE_T' AS element_type, general_type, 30 + rank AS fingerprint_order
+ FROM tran_date_cols
+ WHERE rank = 1
+ UNION ALL
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'MEAS' AS element_type, general_type, 40 + rank AS fingerprint_order
+ FROM numeric_cols_ranked
+ WHERE rank = 1
+),
+selected_tables AS (
+ SELECT profile_run_id, schema_name, table_name,
+ STRING_AGG(column_name, ',' ORDER BY element_type, fingerprint_order, column_name) AS column_names,
+ 'TO_VARCHAR(COUNT(*)) || ''|'' || ' ||
+ STRING_AGG(
+ REPLACE(
+ CASE
+ WHEN general_type = 'D' THEN 'TO_VARCHAR(MIN(@@@)) || ''|'' || TO_VARCHAR(MAX(@@@)) || ''|'' || TO_VARCHAR(COUNT(DISTINCT @@@))'
+ WHEN general_type = 'A' THEN 'TO_VARCHAR(MIN(@@@)) || ''|'' || TO_VARCHAR(MAX(@@@)) || ''|'' || TO_VARCHAR(COUNT(DISTINCT @@@)) || ''|'' || TO_VARCHAR(SUM(LENGTH(@@@)))'
+ WHEN general_type = 'N' THEN 'TO_VARCHAR(COUNT(@@@)) || ''|'' ||
+ TO_VARCHAR(COUNT(DISTINCT MOD(CAST(CAST(COALESCE(@@@,0) AS DECIMAL(38,6)) * 1000000 AS DECIMAL(38,0)), 1000003))) || ''|'' ||
+ COALESCE(TO_VARCHAR(CAST(MIN(@@@) AS DECIMAL(38,6))), '''') || ''|'' ||
+ COALESCE(TO_VARCHAR(CAST(MAX(@@@) AS DECIMAL(38,6))), '''') || ''|'' ||
+ COALESCE(TO_VARCHAR(MOD(COALESCE(SUM(MOD(CAST(ABS(COALESCE(@@@,0)) * 1000000 AS DECIMAL), 1000000007)), 0), 1000000007)), '''') || ''|'' ||
+ COALESCE(TO_VARCHAR(MOD(COALESCE(SUM(MOD(CAST(ABS(COALESCE(@@@,0)) * 1000000 AS DECIMAL), 1000000009)), 0), 1000000009)), '''')'
+ END,
+ '@@@', '"' || column_name || '"'
+ ),
+ ' || ''|'' || '
+ ORDER BY element_type, fingerprint_order, column_name
+ ) AS fingerprint
+ FROM combined
+ GROUP BY profile_run_id, schema_name, table_name
+)
+-- Insert tests for selected tables
+INSERT INTO test_definitions (
+ table_groups_id, test_suite_id, test_type,
+ schema_name, table_name, groupby_names,
+ test_active, last_auto_gen_date, profiling_as_of_date, profile_run_id,
+ history_calculation, history_lookback, custom_query
+)
+SELECT
+ :TABLE_GROUPS_ID ::UUID AS table_groups_id,
+ :TEST_SUITE_ID ::UUID AS test_suite_id,
+ 'Freshness_Trend' AS test_type,
+ s.schema_name,
+ s.table_name,
+ s.column_names AS groupby_names,
+ 'Y' AS test_active,
+ :RUN_DATE ::TIMESTAMP AS last_auto_gen_date,
+ :AS_OF_DATE ::TIMESTAMP AS profiling_as_of_date,
+ s.profile_run_id,
+ 'PREDICT' AS history_calculation,
+ NULL AS history_lookback,
+ s.fingerprint AS custom_query
+FROM selected_tables s
+ -- Only insert if test type is active
+WHERE EXISTS (SELECT 1 FROM test_types WHERE test_type = 'Freshness_Trend' AND active = 'Y')
+ -- Only insert if test type is included in generation set
+ AND EXISTS (SELECT 1 FROM generation_sets WHERE test_type = 'Freshness_Trend' AND generation_set = :GENERATION_SET)
+
+-- Match "uix_td_autogen_table" unique index exactly
+ON CONFLICT (test_suite_id, test_type, schema_name, table_name)
+WHERE last_auto_gen_date IS NOT NULL
+ AND table_name IS NOT NULL
+ AND column_name IS NULL
+
+-- Update tests if they already exist
+DO UPDATE SET
+ groupby_names = EXCLUDED.groupby_names,
+ test_active = EXCLUDED.test_active,
+ last_auto_gen_date = EXCLUDED.last_auto_gen_date,
+ profiling_as_of_date = EXCLUDED.profiling_as_of_date,
+ profile_run_id = EXCLUDED.profile_run_id,
+ history_calculation = EXCLUDED.history_calculation,
+ history_lookback = EXCLUDED.history_lookback,
+ custom_query = EXCLUDED.custom_query
+-- Ignore locked tests
+WHERE test_definitions.lock_refresh = 'N'
+ -- Don't update existing tests in "insert" mode
+ AND NOT COALESCE(:INSERT_ONLY, FALSE);
diff --git a/testgen/template/flavors/sap_hana/gen_query_tests/gen_Table_Freshness.sql b/testgen/template/flavors/sap_hana/gen_query_tests/gen_Table_Freshness.sql
new file mode 100644
index 00000000..d3cc765d
--- /dev/null
+++ b/testgen/template/flavors/sap_hana/gen_query_tests/gen_Table_Freshness.sql
@@ -0,0 +1,181 @@
+WITH latest_run AS (
+ -- Latest complete profiling run before as-of-date
+ SELECT MAX(run_date) AS last_run_date
+ FROM profile_results
+ WHERE table_groups_id = :TABLE_GROUPS_ID ::UUID
+ AND run_date::DATE <= :AS_OF_DATE ::DATE
+),
+latest_results AS (
+ -- Column results for latest run
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type,
+ distinct_value_ct, record_ct, null_value_ct,
+ max_value, min_value, avg_value, stdev_value
+ FROM profile_results p
+ INNER JOIN latest_run lr ON p.run_date = lr.last_run_date
+ WHERE table_groups_id = :TABLE_GROUPS_ID ::UUID
+),
+-- IDs - TOP 2
+id_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type, distinct_value_ct,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY
+ CASE
+ WHEN functional_data_type ILIKE 'ID-Unique%' THEN 1
+ WHEN functional_data_type = 'ID-Secondary' THEN 2
+ ELSE 3
+ END, distinct_value_ct DESC, column_name
+ ) AS rank
+ FROM latest_results
+ WHERE general_type IN ('A', 'D', 'N')
+ AND functional_data_type ILIKE 'ID%'
+),
+-- Process Date - TOP 1
+process_date_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type, distinct_value_ct,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY
+ CASE
+ WHEN column_name ILIKE '%mod%' THEN 1
+ WHEN column_name ILIKE '%up%' THEN 1
+ WHEN column_name ILIKE '%cr%' THEN 2
+ WHEN column_name ILIKE '%in%' THEN 2
+ END, distinct_value_ct DESC, column_name
+ ) AS rank
+ FROM latest_results
+ WHERE general_type IN ('A', 'D', 'N')
+ AND functional_data_type ILIKE 'process%'
+),
+-- Transaction Date - TOP 1
+tran_date_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type, distinct_value_ct,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY distinct_value_ct DESC, column_name
+ ) AS rank
+ FROM latest_results
+ WHERE general_type IN ('A', 'D', 'N')
+ AND functional_data_type ILIKE 'transactional date%'
+ OR functional_data_type ILIKE 'period%'
+ OR functional_data_type = 'timestamp'
+),
+-- Numeric Measures
+numeric_cols AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ functional_data_type, general_type,
+ -- Weighted score
+ (
+ 0.25 * (distinct_value_ct * 1.0 / NULLIF(record_ct, 0)) +
+ 0.15 * ((max_value - min_value) / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) +
+ 0.10 * (LEAST(1, LOG(GREATEST(distinct_value_ct, 2))) / LOG(GREATEST(record_ct, 2))) +
+ 0.40 * (stdev_value / NULLIF(ABS(NULLIF(avg_value, 0)), 1)) +
+ 0.10 * (1.0 - (null_value_ct * 1.0 / NULLIF(NULLIF(record_ct, 0), 1)))
+ ) AS change_detection_score
+ FROM latest_results
+ WHERE general_type = 'N'
+ AND (
+ functional_data_type ILIKE 'Measure%'
+ OR functional_data_type IN ('Sequence', 'Constant')
+ )
+),
+numeric_cols_ranked AS (
+ SELECT *,
+ ROW_NUMBER() OVER (
+ PARTITION BY schema_name, table_name
+ ORDER BY change_detection_score DESC, column_name
+ ) AS rank
+ FROM numeric_cols
+ WHERE change_detection_score IS NOT NULL
+),
+combined AS (
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'ID' AS element_type, general_type, 10 + rank AS fingerprint_order
+ FROM id_cols
+ WHERE rank <= 2
+ UNION ALL
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'DATE_P' AS element_type, general_type, 20 + rank AS fingerprint_order
+ FROM process_date_cols
+ WHERE rank = 1
+ UNION ALL
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'DATE_T' AS element_type, general_type, 30 + rank AS fingerprint_order
+ FROM tran_date_cols
+ WHERE rank = 1
+ UNION ALL
+ SELECT profile_run_id, schema_name, table_name, column_name,
+ 'MEAS' AS element_type, general_type, 40 + rank AS fingerprint_order
+ FROM numeric_cols_ranked
+ WHERE rank = 1
+),
+selected_tables AS (
+ SELECT profile_run_id, schema_name, table_name,
+ 'TO_VARCHAR(COUNT(*)) || ''|'' || ' ||
+ STRING_AGG(
+ REPLACE(
+ CASE
+ WHEN general_type = 'D' THEN 'TO_VARCHAR(MIN(@@@)) || ''|'' || TO_VARCHAR(MAX(@@@)) || ''|'' || TO_VARCHAR(COUNT(DISTINCT @@@))'
+ WHEN general_type = 'A' THEN 'TO_VARCHAR(MIN(@@@)) || ''|'' || TO_VARCHAR(MAX(@@@)) || ''|'' || TO_VARCHAR(COUNT(DISTINCT @@@)) || ''|'' || TO_VARCHAR(SUM(LENGTH(@@@)))'
+ WHEN general_type = 'N' THEN 'TO_VARCHAR(COUNT(@@@)) || ''|'' ||
+ TO_VARCHAR(COUNT(DISTINCT MOD(CAST(CAST(COALESCE(@@@,0) AS DECIMAL(38,6)) * 1000000 AS DECIMAL(38,0)), 1000003))) || ''|'' ||
+ COALESCE(TO_VARCHAR(CAST(MIN(@@@) AS DECIMAL(38,6))), '''') || ''|'' ||
+ COALESCE(TO_VARCHAR(CAST(MAX(@@@) AS DECIMAL(38,6))), '''') || ''|'' ||
+ COALESCE(TO_VARCHAR(MOD(COALESCE(SUM(MOD(CAST(ABS(COALESCE(@@@,0)) * 1000000 AS DECIMAL), 1000000007)), 0), 1000000007)), '''') || ''|'' ||
+ COALESCE(TO_VARCHAR(MOD(COALESCE(SUM(MOD(CAST(ABS(COALESCE(@@@,0)) * 1000000 AS DECIMAL), 1000000009)), 0), 1000000009)), '''')'
+ END,
+ '@@@', '"' || column_name || '"'
+ ),
+ ' || ''|'' || '
+ ORDER BY element_type, fingerprint_order, column_name
+ ) AS fingerprint
+ FROM combined
+ GROUP BY profile_run_id, schema_name, table_name
+)
+-- Insert tests for selected tables
+INSERT INTO test_definitions (
+ table_groups_id, test_suite_id, test_type,
+ schema_name, table_name,
+ test_active, last_auto_gen_date, profiling_as_of_date, profile_run_id,
+ history_calculation, history_lookback, custom_query
+)
+SELECT
+ :TABLE_GROUPS_ID ::UUID AS table_groups_id,
+ :TEST_SUITE_ID ::UUID AS test_suite_id,
+ 'Table_Freshness' AS test_type,
+ s.schema_name,
+ s.table_name,
+ 'Y' AS test_active,
+ :RUN_DATE ::TIMESTAMP AS last_auto_gen_date,
+ :AS_OF_DATE ::TIMESTAMP AS profiling_as_of_date,
+ s.profile_run_id,
+ 'Value' AS history_calculation,
+ 1 AS history_lookback,
+ s.fingerprint AS custom_query
+FROM selected_tables s
+ -- Only insert if test type is active
+WHERE EXISTS (SELECT 1 FROM test_types WHERE test_type = 'Table_Freshness' AND active = 'Y')
+ -- Only insert if test type is included in generation set
+ AND EXISTS (SELECT 1 FROM generation_sets WHERE test_type = 'Table_Freshness' AND generation_set = :GENERATION_SET)
+
+-- Match "uix_td_autogen_table" unique index exactly
+ON CONFLICT (test_suite_id, test_type, schema_name, table_name)
+WHERE last_auto_gen_date IS NOT NULL
+ AND table_name IS NOT NULL
+ AND column_name IS NULL
+
+-- Update tests if they already exist
+DO UPDATE SET
+ test_active = EXCLUDED.test_active,
+ last_auto_gen_date = EXCLUDED.last_auto_gen_date,
+ profiling_as_of_date = EXCLUDED.profiling_as_of_date,
+ profile_run_id = EXCLUDED.profile_run_id,
+ history_calculation = EXCLUDED.history_calculation,
+ history_lookback = EXCLUDED.history_lookback,
+ custom_query = EXCLUDED.custom_query
+-- Ignore locked tests
+WHERE test_definitions.lock_refresh = 'N';
diff --git a/testgen/template/flavors/sap_hana/profiling/project_profiling_query.sql b/testgen/template/flavors/sap_hana/profiling/project_profiling_query.sql
new file mode 100644
index 00000000..e80b0374
--- /dev/null
+++ b/testgen/template/flavors/sap_hana/profiling/project_profiling_query.sql
@@ -0,0 +1,252 @@
+-- TG-IF do_sample
+WITH target_table AS (
+ SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}" TABLESAMPLE BERNOULLI({SAMPLE_PERCENT_CALC})
+)
+-- TG-ELSE
+WITH target_table AS (
+ SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
+)
+-- TG-ENDIF
+SELECT
+ {CONNECTION_ID} AS connection_id,
+ '{PROJECT_CODE}' AS project_code,
+ '{TABLE_GROUPS_ID}' AS table_groups_id,
+ '{DATA_SCHEMA}' AS schema_name,
+ '{RUN_DATE}' AS run_date,
+ '{DATA_TABLE}' AS table_name,
+ {COL_POS} AS position,
+ '{COL_NAME_SANITIZED}' AS column_name,
+ '{COL_TYPE}' AS column_type,
+ '{DB_DATA_TYPE}' AS db_data_type,
+ '{COL_GEN_TYPE}' AS general_type,
+ COUNT(*) AS record_ct,
+-- TG-IF is_type_X
+ COUNT(CASE WHEN "{COL_NAME}" IS NOT NULL THEN 1 END) AS value_ct,
+ NULL AS distinct_value_ct,
+-- TG-ELSE
+ COUNT("{COL_NAME}") AS value_ct,
+ COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
+-- TG-ENDIF
+ SUM(CASE WHEN "{COL_NAME}" IS NULL THEN 1 ELSE 0 END) AS null_value_ct,
+-- TG-IF is_type_ADN
+ MIN(LENGTH(TO_VARCHAR("{COL_NAME}"))) AS min_length,
+ MAX(LENGTH(TO_VARCHAR("{COL_NAME}"))) AS max_length,
+ AVG(NULLIF(LENGTH(TO_VARCHAR("{COL_NAME}")), 0)) AS avg_length,
+-- TG-ELSE
+ NULL AS min_length,
+ NULL AS max_length,
+ NULL AS avg_length,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ SUM(CASE
+ WHEN TRIM("{COL_NAME}") LIKE_REGEXPR '^0(\.0*)?$' THEN 1 ELSE 0
+ END) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ SUM(1 - ABS(SIGN("{COL_NAME}"))) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_not_A_not_N
+ NULL AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ COUNT(DISTINCT UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COL_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', ''))) AS distinct_std_value_ct,
+ 0 AS zero_length_ct,
+ SUM(CASE
+ WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1
+ ELSE 0
+ END) AS lead_space_ct,
+ SUM(CASE WHEN "{COL_NAME}" LIKE '"%"' OR "{COL_NAME}" LIKE '''%''' THEN 1 ELSE 0 END) AS quoted_value_ct,
+ SUM(CASE WHEN "{COL_NAME}" LIKE_REGEXPR '[0-9]' THEN 1 ELSE 0 END) AS includes_digit_ct,
+ SUM(CASE
+ WHEN LOWER("{COL_NAME}") LIKE_REGEXPR '^(\.{1,}|-{1,}|\?{1,}|[[:space:]]{1,}|0{2,}|9{2,}|x{2,}|z{2,})$' THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
+ 'n/a','#na','none','null','unknown') THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
+ '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
+ '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
+ ELSE 0
+ END) AS filled_value_ct,
+ SUBSTR(MIN(CASE WHEN "{COL_NAME}" IS NOT NULL THEN "{COL_NAME}" END), 1, 100) AS min_text,
+ SUBSTR(MAX(CASE WHEN "{COL_NAME}" IS NOT NULL THEN "{COL_NAME}" END), 1, 100) AS max_text,
+ SUM(CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" <> LOWER("{COL_NAME}") THEN 1 ELSE 0 END) AS upper_case_ct,
+ SUM(CASE WHEN "{COL_NAME}" = LOWER("{COL_NAME}") AND "{COL_NAME}" <> UPPER("{COL_NAME}") THEN 1 ELSE 0 END) AS lower_case_ct,
+ SUM(CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" = LOWER("{COL_NAME}") THEN 1 ELSE 0 END) AS non_alpha_ct,
+ COUNT(CASE WHEN
+ REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COL_NAME}",
+ NCHAR(160), ''), NCHAR(8201), ''), NCHAR(8203), ''), NCHAR(8204), ''), NCHAR(8205), ''),
+ NCHAR(8206), ''), NCHAR(8207), ''), NCHAR(8239), ''), NCHAR(12288), ''), NCHAR(65279), '')
+ <> "{COL_NAME}" THEN 1 END) AS non_printing_ct,
+ SUM(<%IS_NUM;SUBSTR("{COL_NAME}", 1, 31)%>) AS numeric_ct,
+ SUM(<%IS_DATE;SUBSTR("{COL_NAME}", 1, 26)%>) AS date_ct,
+ CASE
+ WHEN SUM(CASE WHEN "{COL_NAME}" LIKE_REGEXPR '^[0-9]{1,5}[a-zA-Z]?[[:space:]][[:alnum:]_]{1,5}\.?[[:space:]]?[[:alnum:]_]*[[:space:]]?[[:alnum:]_]*[[:space:]][a-zA-Z]{1,6}\.?[[:space:]]?[0-9]{0,5}[A-Z]?$'
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.8 THEN 'STREET_ADDR'
+ WHEN SUM(CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.9 THEN 'STATE_USA'
+ WHEN SUM(CASE WHEN "{COL_NAME}" LIKE_REGEXPR '^(\+1|1)?[ .-]?(\([2-9][0-9]{2}\)|[2-9][0-9]{2})[ .-]?[2-9][0-9]{2}[ .-]?[0-9]{4}$'
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.8 THEN 'PHONE_USA'
+ WHEN SUM(CASE WHEN "{COL_NAME}" LIKE_REGEXPR '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'
+ AND "{COL_NAME}" NOT LIKE '%://%'
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.9 THEN 'EMAIL'
+ WHEN SUM(CASE WHEN REPLACE_REGEXPR('[0-9]' IN "{COL_NAME}" WITH '9') IN ('99999', '999999999', '99999-9999')
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.9 THEN 'ZIP_USA'
+ WHEN SUM(CASE WHEN "{COL_NAME}" LIKE_REGEXPR '^[[:alnum:]_[:space:]-]+\.(txt|csv|tsv|dat|doc|pdf|xlsx)$'
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.9 THEN 'FILE_NAME'
+ WHEN SUM(CASE WHEN "{COL_NAME}" LIKE_REGEXPR '^([0-9]{4}[- ]?){3}[0-9]{4}$'
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.8 THEN 'CREDIT_CARD'
+ WHEN SUM(CASE WHEN "{COL_NAME}" LIKE_REGEXPR '^([^,|' || CHAR(9) || ']{1,20}[,|' || CHAR(9) || ']){2,}[^,|' || CHAR(9) || ']{0,20}([,|' || CHAR(9) || ']?[^,|' || CHAR(9) || ']{0,20})*$'
+ AND NOT "{COL_NAME}" LIKE_REGEXPR '[[:space:]](and|but|or|yet)[[:space:]]'
+ THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.8 THEN 'DELIMITED_DATA'
+ WHEN SUM(CASE WHEN "{COL_NAME}" LIKE_REGEXPR '^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$'
+ AND SUBSTR("{COL_NAME}", 1, 3) NOT BETWEEN '734' AND '749'
+ AND SUBSTR("{COL_NAME}", 1, 3) <> '666' THEN 1 ELSE 0 END) / NULLIF(COUNT("{COL_NAME}"), 0) > 0.9 THEN 'SSN'
+ END AS std_pattern_match,
+-- TG-ELSE
+ NULL AS distinct_std_value_ct,
+ NULL AS zero_length_ct,
+ NULL AS lead_space_ct,
+ NULL AS quoted_value_ct,
+ NULL AS includes_digit_ct,
+ NULL AS filled_value_ct,
+ NULL AS min_text,
+ NULL AS max_text,
+ NULL AS upper_case_ct,
+ NULL AS lower_case_ct,
+ NULL AS non_alpha_ct,
+ NULL AS non_printing_ct,
+ NULL AS numeric_ct,
+ NULL AS date_ct,
+ NULL AS std_pattern_match,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT SUBSTR(STRING_AGG(formatted_pattern, ' | ' ORDER BY ct DESC), 1, 1000)
+ FROM (
+ SELECT TO_VARCHAR(COUNT(*)) || ' | ' || pattern AS formatted_pattern,
+ COUNT(*) AS ct
+ FROM (SELECT REPLACE_REGEXPR('[0-9]' IN REPLACE_REGEXPR('[A-Z]' IN REPLACE_REGEXPR('[a-z]' IN
+ "{COL_NAME}" WITH 'a') WITH 'A') WITH 'N') AS pattern
+ FROM target_table
+ WHERE "{COL_NAME}" IS NOT NULL AND "{COL_NAME}" > ' ' AND (SELECT MAX(LENGTH("{COL_NAME}"))
+ FROM target_table) BETWEEN 3 and {MAX_PATTERN_LENGTH}) p
+ GROUP BY pattern
+ HAVING pattern > ' '
+ ORDER BY COUNT(*) DESC
+ LIMIT 5
+ ) ps) AS top_patterns,
+-- TG-ELSE
+ NULL AS top_patterns,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ MIN("{COL_NAME}") AS min_value,
+ MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
+ MAX("{COL_NAME}") AS max_value,
+ AVG(CAST("{COL_NAME}" AS DECIMAL)) AS avg_value,
+ STDDEV(CAST("{COL_NAME}" AS DECIMAL)) AS stdev_value,
+ MIN(pct_25) AS percentile_25,
+ MIN(pct_50) AS percentile_50,
+ MIN(pct_75) AS percentile_75,
+-- TG-ELSE
+ NULL AS min_value,
+ NULL AS min_value_over_0,
+ NULL AS max_value,
+ NULL AS avg_value,
+ NULL AS stdev_value,
+ NULL AS percentile_25,
+ NULL AS percentile_50,
+ NULL AS percentile_75,
+-- TG-ENDIF
+-- TG-IF is_N_decimal
+ SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) AS fractional_sum,
+-- TG-ELSE
+ NULL AS fractional_sum,
+-- TG-ENDIF
+-- TG-IF is_type_D
+ CASE
+ WHEN MIN("{COL_NAME}") IS NULL THEN NULL
+ ELSE GREATEST(MIN("{COL_NAME}"), TO_DATE('0001-01-01', 'YYYY-MM-DD'))
+ END AS min_date,
+ MAX("{COL_NAME}") AS max_date,
+ SUM(CASE
+ WHEN <%DATEDIFF_MONTH;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> > 12 THEN 1
+ ELSE 0
+ END) AS before_1yr_date_ct,
+ SUM(CASE
+ WHEN <%DATEDIFF_MONTH;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> > 60 THEN 1
+ ELSE 0
+ END) AS before_5yr_date_ct,
+ SUM(CASE
+ WHEN <%DATEDIFF_MONTH;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> > 240 THEN 1
+ ELSE 0
+ END) AS before_20yr_date_ct,
+ SUM(CASE
+ WHEN <%DATEDIFF_MONTH;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> > 1200 THEN 1
+ ELSE 0
+ END) AS before_100yr_date_ct,
+ SUM(CASE
+ WHEN <%DATEDIFF_DAY;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> BETWEEN 0 AND 365 THEN 1
+ ELSE 0
+ END) AS within_1yr_date_ct,
+ SUM(CASE
+ WHEN <%DATEDIFF_DAY;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%> BETWEEN 0 AND 30 THEN 1
+ ELSE 0
+ END) AS within_1mo_date_ct,
+ SUM(CASE
+ WHEN "{COL_NAME}" > TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS') THEN 1 ELSE 0
+ END) AS future_date_ct,
+ SUM(CASE
+ WHEN <%DATEDIFF_MONTH;TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS');"{COL_NAME}"%> > 240 THEN 1
+ ELSE 0
+ END) AS distant_future_date_ct,
+ COUNT(DISTINCT <%DATEDIFF_DAY;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>) AS date_days_present,
+ COUNT(DISTINCT <%DATEDIFF_WEEK;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>) AS date_weeks_present,
+ COUNT(DISTINCT <%DATEDIFF_MONTH;"{COL_NAME}";TO_DATE('{RUN_DATE}', 'YYYY-MM-DD HH24:MI:SS')%>) AS date_months_present,
+-- TG-ELSE
+ NULL AS min_date,
+ NULL AS max_date,
+ NULL AS before_1yr_date_ct,
+ NULL AS before_5yr_date_ct,
+ NULL AS before_20yr_date_ct,
+ NULL AS before_100yr_date_ct,
+ NULL AS within_1yr_date_ct,
+ NULL AS within_1mo_date_ct,
+ NULL AS future_date_ct,
+ NULL AS distant_future_date_ct,
+ NULL AS date_days_present,
+ NULL AS date_weeks_present,
+ NULL AS date_months_present,
+-- TG-ENDIF
+-- TG-IF is_type_B
+ SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
+-- TG-ELSE
+ NULL AS boolean_true_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT COUNT(DISTINCT REPLACE_REGEXPR('[0-9]' IN REPLACE_REGEXPR('[A-Z]' IN REPLACE_REGEXPR('[a-z]' IN
+ "{COL_NAME}" WITH 'a') WITH 'A') WITH 'N')
+ )
+ FROM target_table
+ WHERE "{COL_NAME}" IS NOT NULL AND "{COL_NAME}" > ' ') AS distinct_pattern_ct,
+ SUM(SIGN(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REPLACE(TRIM("{COL_NAME}"), ' ', '')))) AS embedded_space_ct,
+ AVG(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REPLACE(TRIM("{COL_NAME}"), ' ', ''))) AS avg_embedded_spaces,
+-- TG-ELSE
+ NULL AS distinct_pattern_ct,
+ NULL AS embedded_space_ct,
+ NULL AS avg_embedded_spaces,
+-- TG-ENDIF
+ '{PROFILE_RUN_ID}' AS profile_run_id
+ FROM target_table
+-- TG-IF is_N_sampling
+ , (SELECT
+ PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_25,
+ PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_50,
+ PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_75
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" TABLESAMPLE BERNOULLI({SAMPLE_PERCENT_CALC}) LIMIT 1000000) pctile
+-- TG-ENDIF
+-- TG-IF is_N_no_sampling
+ , (SELECT
+ PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_25,
+ PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_50,
+ PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_75
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" LIMIT 1000000) pctile
+-- TG-ENDIF
diff --git a/testgen/template/flavors/sap_hana/profiling/project_secondary_profiling_query.sql b/testgen/template/flavors/sap_hana/profiling/project_secondary_profiling_query.sql
new file mode 100644
index 00000000..c2593cb4
--- /dev/null
+++ b/testgen/template/flavors/sap_hana/profiling/project_secondary_profiling_query.sql
@@ -0,0 +1,36 @@
+-- Get Freqs for selected columns
+WITH ranked_vals AS (
+ SELECT "{COL_NAME}",
+ COUNT(*) AS ct,
+ ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC, "{COL_NAME}") AS rn
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
+-- TG-IF do_sample_bool
+ TABLESAMPLE BERNOULLI({SAMPLE_PERCENT_CALC})
+-- TG-ENDIF
+ WHERE "{COL_NAME}" IS NOT NULL AND "{COL_NAME}" > ' '
+ GROUP BY "{COL_NAME}"
+),
+consol_vals AS (
+ SELECT COALESCE(CASE WHEN rn <= 10 THEN '| ' || "{COL_NAME}" || ' | ' || TO_VARCHAR(ct)
+ ELSE NULL
+ END, '| Other Values (' || TO_VARCHAR(COUNT(DISTINCT "{COL_NAME}")) || ') | ' || TO_VARCHAR(SUM(ct))) AS val,
+ MIN(rn) as min_rn
+ FROM ranked_vals
+ GROUP BY CASE WHEN rn <= 10 THEN '| ' || "{COL_NAME}" || ' | ' || TO_VARCHAR(ct)
+ ELSE NULL
+ END
+)
+SELECT '{PROJECT_CODE}' as project_code,
+ '{DATA_SCHEMA}' as schema_name,
+ '{RUN_DATE}' as run_date,
+ '{DATA_TABLE}' as table_name,
+ '{COL_NAME}' as column_name,
+ REPLACE(STRING_AGG(val, '^#^' ORDER BY min_rn), '^#^', CHAR(10)) AS top_freq_values,
+ (SELECT LOWER(BINTOHEX(HASH_MD5(TO_BINARY(STRING_AGG("{COL_NAME}", '|' ORDER BY "{COL_NAME}")))))
+ FROM (SELECT DISTINCT "{COL_NAME}"
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
+-- TG-IF do_sample_bool
+ TABLESAMPLE BERNOULLI({SAMPLE_PERCENT_CALC})
+-- TG-ENDIF
+ WHERE "{COL_NAME}" IS NOT NULL AND "{COL_NAME}" > ' ')) as distinct_value_hash
+ FROM consol_vals
diff --git a/testgen/template/flavors/sap_hana/profiling/templated_functions.yaml b/testgen/template/flavors/sap_hana/profiling/templated_functions.yaml
new file mode 100644
index 00000000..dbc2c73f
--- /dev/null
+++ b/testgen/template/flavors/sap_hana/profiling/templated_functions.yaml
@@ -0,0 +1,108 @@
+DATEDIFF_DAY: DAYS_BETWEEN(CAST({$1} AS DATE), CAST({$2} AS DATE))
+
+DATEDIFF_WEEK: (DAYS_BETWEEN(ADD_DAYS(CAST({$1} AS DATE), -WEEKDAY(CAST({$1} AS DATE))), ADD_DAYS(CAST({$2} AS DATE), -WEEKDAY(CAST({$2} AS DATE))))) / 7
+
+DATEDIFF_MONTH: ((YEAR({$2}) * 12 + MONTH({$2})) - (YEAR({$1}) * 12 + MONTH({$1})))
+
+DATEDIFF_QUARTER: FLOOR(((YEAR({$2}) * 12 + MONTH({$2})) - (YEAR({$1}) * 12 + MONTH({$1}))) / 3)
+
+DATEDIFF_YEAR: YEAR({$2}) - YEAR({$1})
+
+IS_NUM: CASE
+ WHEN {$1} LIKE_REGEXPR '^[[:space:]]*[+-]?\$?[[:space:]]*[0-9]+(,[0-9]{3})*(\.[0-9]*)?[%]?[[:space:]]*$' THEN 1
+ ELSE 0
+ END
+
+IS_DATE: CASE
+ /* YYYY-MM-DD HH:MM:SS SSSSSS or YYYY-MM-DD HH:MM:SS */
+ WHEN {$1} LIKE_REGEXPR '^([0-9]{4})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])[[:space:]](2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])([[:space:]][0-9]{6})?$'
+ THEN CASE
+ WHEN TO_INTEGER(SUBSTR({$1}, 1, 4)) BETWEEN 1800 AND 2200
+ AND (
+ ( SUBSTR({$1}, 6, 2) IN ('01', '03', '05', '07', '08',
+ '10', '12')
+ AND TO_INTEGER(SUBSTR({$1}, 9, 2)) BETWEEN 1 AND 31 )
+ OR ( SUBSTR({$1}, 6, 2) IN ('04', '06', '09')
+ AND TO_INTEGER(SUBSTR({$1}, 9, 2)) BETWEEN 1 AND 30 )
+ OR ( SUBSTR({$1}, 6, 2) = '02'
+ AND TO_INTEGER(SUBSTR({$1}, 9, 2)) BETWEEN 1 AND 29)
+ )
+ THEN 1
+ ELSE 0
+ END
+ /* YYYYMMDDHHMMSSSSSS or YYYYMMDDHHMM */
+ WHEN {$1} LIKE_REGEXPR '^([0-9]{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])([0-5][0-9])([0-5][0-9])([0-9]{6})$'
+ OR {$1} LIKE_REGEXPR '^([0-9]{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])$'
+ THEN CASE
+ WHEN TO_INTEGER(SUBSTR({$1}, 1, 4)) BETWEEN 1800 AND 2200
+ AND (
+ ( SUBSTR({$1}, 5, 2) IN ('01', '03', '05', '07', '08',
+ '10', '12')
+ AND TO_INTEGER(SUBSTR({$1}, 7, 2)) BETWEEN 1 AND 31 )
+ OR ( SUBSTR({$1}, 5, 2) IN ('04', '06', '09')
+ AND TO_INTEGER(SUBSTR({$1}, 7, 2)) BETWEEN 1 AND 30 )
+ OR ( SUBSTR({$1}, 5, 2) = '02'
+ AND TO_INTEGER(SUBSTR({$1}, 7, 2)) BETWEEN 1 AND 29)
+ )
+ THEN 1
+ ELSE 0
+ END
+ /* Exclude anything else long */
+ WHEN LENGTH({$1}) > 11 THEN 0
+ /* YYYY-MMM/MM-DD */
+ WHEN REPLACE_REGEXPR('(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)' IN UPPER({$1}) WITH '12') LIKE_REGEXPR
+ '[12][09][0-9][0-9]-[0-1]?[0-9]-[0-3]?[0-9]'
+ THEN CASE
+ WHEN TO_INTEGER(SUBSTR_REGEXPR('^[^-]+' IN {$1})) BETWEEN 1800 AND 2200
+ AND (
+ ( UPPER(SUBSTR_REGEXPR('[^-]+' IN {$1} OCCURRENCE 2)) IN ('01', '03', '05', '07', '08',
+ '1', '3', '5', '7', '8', '10', '12',
+ 'JAN', 'MAR', 'MAY', 'JUL', 'AUG',
+ 'OCT', 'DEC')
+ AND TO_INTEGER(SUBSTR_REGEXPR('[^-]+$' IN {$1})) BETWEEN 1 AND 31 )
+ OR ( UPPER(SUBSTR_REGEXPR('[^-]+' IN {$1} OCCURRENCE 2)) IN ('04', '06', '09', '4', '6', '9', '11',
+ 'APR', 'JUN', 'SEP', 'NOV')
+ AND TO_INTEGER(SUBSTR_REGEXPR('[^-]+$' IN {$1})) BETWEEN 1 AND 30 )
+ OR ( UPPER(SUBSTR_REGEXPR('[^-]+' IN {$1} OCCURRENCE 2)) IN ('02', '2', 'FEB')
+ AND TO_INTEGER(SUBSTR_REGEXPR('[^-]+$' IN {$1})) BETWEEN 1 AND 29)
+ )
+ THEN 1
+ ELSE 0
+ END
+ /* MM/-DD/-YY/YYYY */
+ WHEN REPLACE({$1}, '-', '/') LIKE_REGEXPR '^[0-1]?[0-9]/[0-3]?[0-9]/[12][09][0-9][0-9]$'
+ OR REPLACE({$1}, '-', '/') LIKE_REGEXPR '^[0-1]?[0-9]/[0-3]?[0-9]/[0-9][0-9]$'
+ THEN
+ CASE
+ WHEN TO_INTEGER(SUBSTR_REGEXPR('[^/]+' IN REPLACE({$1}, '-', '/') OCCURRENCE 1)) BETWEEN 1 AND 12
+ AND (
+ ( TO_INTEGER(SUBSTR_REGEXPR('[^/]+' IN REPLACE({$1}, '-', '/') OCCURRENCE 1)) IN (1, 3, 5, 7, 8, 10, 12)
+ AND TO_INTEGER(SUBSTR_REGEXPR('[^/]+' IN REPLACE({$1}, '-', '/') OCCURRENCE 2)) BETWEEN 1 AND 31 )
+ OR ( TO_INTEGER(SUBSTR_REGEXPR('[^/]+' IN REPLACE({$1}, '-', '/') OCCURRENCE 1)) IN (4, 6, 9, 11)
+ AND TO_INTEGER(SUBSTR_REGEXPR('[^/]+' IN REPLACE({$1}, '-', '/') OCCURRENCE 2)) BETWEEN 1 AND 30 )
+ OR ( TO_INTEGER(SUBSTR_REGEXPR('[^/]+' IN REPLACE({$1}, '-', '/') OCCURRENCE 1)) = 2
+ AND TO_INTEGER(SUBSTR_REGEXPR('[^/]+' IN REPLACE({$1}, '-', '/') OCCURRENCE 2)) BETWEEN 1 AND 29)
+ )
+ AND
+ TO_INTEGER('20' || SUBSTR(SUBSTR_REGEXPR('[^/]+$' IN REPLACE({$1}, '-', '/')), -2)) BETWEEN 1800 AND 2200
+ THEN 1
+ ELSE 0
+ END
+ /* DD-MMM-YYYY */
+ WHEN UPPER({$1}) LIKE_REGEXPR '[0-3]?[0-9]-(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-[12][09][0-9][0-9]'
+ THEN
+ CASE
+ WHEN TO_INTEGER(SUBSTR_REGEXPR('[^-]+$' IN {$1})) BETWEEN 1800 AND 2200
+ AND (
+ ( UPPER(SUBSTR_REGEXPR('[^-]+' IN {$1} OCCURRENCE 2)) IN ('JAN', 'MAR', 'MAY', 'JUL', 'AUG', 'OCT', 'DEC')
+ AND TO_INTEGER(SUBSTR_REGEXPR('^[^-]+' IN {$1})) BETWEEN 1 AND 31 )
+ OR ( UPPER(SUBSTR_REGEXPR('[^-]+' IN {$1} OCCURRENCE 2)) IN ('APR', 'JUN', 'SEP', 'NOV')
+ AND TO_INTEGER(SUBSTR_REGEXPR('^[^-]+' IN {$1})) BETWEEN 1 AND 30 )
+ OR ( UPPER(SUBSTR_REGEXPR('[^-]+' IN {$1} OCCURRENCE 2)) = 'FEB'
+ AND TO_INTEGER(SUBSTR_REGEXPR('^[^-]+' IN {$1})) BETWEEN 1 AND 29)
+ )
+ THEN 1
+ ELSE 0
+ END
+ ELSE 0
+ END
diff --git a/testgen/template/flavors/sap_hana/validate_tests/get_target_identifiers.sql b/testgen/template/flavors/sap_hana/validate_tests/get_target_identifiers.sql
new file mode 100644
index 00000000..e75b8fc9
--- /dev/null
+++ b/testgen/template/flavors/sap_hana/validate_tests/get_target_identifiers.sql
@@ -0,0 +1,5 @@
+SELECT SCHEMA_NAME AS schema_name,
+ TABLE_NAME AS table_name,
+ COLUMN_NAME AS column_name
+FROM SYS.TABLE_COLUMNS
+WHERE SCHEMA_NAME IN ({TEST_SCHEMAS})
diff --git a/testgen/template/flavors/snowflake/profiling/project_profiling_query.sql b/testgen/template/flavors/snowflake/profiling/project_profiling_query.sql
new file mode 100644
index 00000000..35adb40f
--- /dev/null
+++ b/testgen/template/flavors/snowflake/profiling/project_profiling_query.sql
@@ -0,0 +1,222 @@
+WITH target_table AS (
+-- TG-IF do_sample
+ SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}" SAMPLE ({SAMPLE_SIZE} rows)
+-- TG-ELSE
+ SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
+-- TG-ENDIF
+)
+SELECT
+ {CONNECTION_ID} AS connection_id,
+ '{PROJECT_CODE}' AS project_code,
+ '{TABLE_GROUPS_ID}' AS table_groups_id,
+ '{DATA_SCHEMA}' AS schema_name,
+ '{RUN_DATE}' AS run_date,
+ '{DATA_TABLE}' AS table_name,
+ {COL_POS} AS position,
+ '{COL_NAME_SANITIZED}' AS column_name,
+ '{COL_TYPE}' AS column_type,
+ '{DB_DATA_TYPE}' AS db_data_type,
+ '{COL_GEN_TYPE}' AS general_type,
+ COUNT(*) AS record_ct,
+ COUNT("{COL_NAME}") AS value_ct,
+ COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
+ SUM(NVL2("{COL_NAME}", 0, 1)) AS null_value_ct,
+-- TG-IF is_type_ADN
+ MIN(LEN("{COL_NAME}")) AS min_length,
+ MAX(LEN("{COL_NAME}")) AS max_length,
+ AVG(NULLIF(LEN("{COL_NAME}"), 0)::FLOAT) AS avg_length,
+-- TG-ELSE
+ NULL AS min_length,
+ NULL AS max_length,
+ NULL AS avg_length,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ COUNT(CASE
+ WHEN REGEXP_LIKE(TRIM("{COL_NAME}"::VARCHAR), '^0(\.0*)?$') THEN 1
+ END) AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ SUM( 1 - ABS(SIGN("{COL_NAME}")) )::BIGINT AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_not_A_not_N
+ NULL AS zero_value_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) AS distinct_std_value_ct,
+ COUNT(CASE
+ WHEN "{COL_NAME}" = '' THEN 1
+ END) AS zero_length_ct,
+ COUNT( CASE
+ WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1
+ END) AS lead_space_ct,
+ COUNT(CASE WHEN "{COL_NAME}"::VARCHAR ILIKE '"%"' OR "{COL_NAME}"::VARCHAR ILIKE '''%''' THEN 1 END) AS quoted_value_ct,
+ COUNT(CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '.*[0-9].*') THEN 1 END) AS includes_digit_ct,
+ COUNT(CASE
+ WHEN LOWER("{COL_NAME}"::VARCHAR) REGEXP '\\.{1,}' OR LOWER("{COL_NAME}"::VARCHAR) REGEXP '-{1,}'
+ OR LOWER("{COL_NAME}"::VARCHAR) REGEXP '\\?{1,}' OR LOWER("{COL_NAME}"::VARCHAR) REGEXP '\\s{1,}'
+ OR LOWER("{COL_NAME}"::VARCHAR) REGEXP '0{2,}' OR LOWER("{COL_NAME}"::VARCHAR) REGEXP '9{2,}'
+ OR LOWER("{COL_NAME}"::VARCHAR) REGEXP 'x{2,}' OR LOWER("{COL_NAME}"::VARCHAR) REGEXP 'z{2,}' THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
+ 'n/a','#na','none','null','unknown') THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
+ '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
+ WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
+ '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
+ END) AS filled_value_ct,
+ LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text,
+ LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text,
+ COUNT(CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" <> LOWER("{COL_NAME}") THEN 1 END) AS upper_case_ct,
+ COUNT(CASE WHEN "{COL_NAME}" = LOWER("{COL_NAME}") AND "{COL_NAME}" <> UPPER("{COL_NAME}") THEN 1 END) AS lower_case_ct,
+ COUNT(CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" = LOWER("{COL_NAME}") THEN 1 END) AS non_alpha_ct,
+ COUNT(CASE WHEN TRANSLATE("{COL_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), 'XXXXXXXXXX') <> "{COL_NAME}" THEN 1 END) AS non_printing_ct,
+ SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>) AS numeric_ct,
+ SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>) AS date_ct,
+ CASE
+ WHEN CAST(SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$')
+ THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'STREET_ADDR'
+ WHEN CAST(SUM(CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
+ THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'STATE_USA'
+ WHEN CAST(SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^(\\+1|1)?[ .-]?(\\([2-9][0-9]{2}\\)|[2-9][0-9]{2})[ .-]?[2-9][0-9]{2}[ .-]?[0-9]{4}$')
+ THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'PHONE_USA'
+ WHEN CAST(SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$')
+ AND "{COL_NAME}"::VARCHAR NOT LIKE '%://%'
+ THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'EMAIL'
+ WHEN CAST(SUM(CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
+ THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'ZIP_USA'
+ WHEN CAST(SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^[\\w\\s\-]+\\.(txt|csv|tsv|dat|doc|pdf|xlsx)$')
+ THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'FILE_NAME'
+ WHEN CAST(SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^([0-9]{4}[- ]?){3}[0-9]{4}$')
+ THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'CREDIT_CARD'
+ WHEN CAST(SUM(CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$')
+ AND NOT REGEXP_LIKE("{COL_NAME}"::VARCHAR, '.*\\s(and|but|or|yet)\\s.*')
+ THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'DELIMITED_DATA'
+ WHEN SUM (CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$')
+ AND LEFT("{COL_NAME}", 3) NOT BETWEEN '734' AND '749'
+ AND LEFT("{COL_NAME}", 3) <> '666' THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'SSN'
+ END AS std_pattern_match,
+-- TG-ELSE
+ NULL AS distinct_std_value_ct,
+ NULL AS zero_length_ct,
+ NULL AS lead_space_ct,
+ NULL AS quoted_value_ct,
+ NULL AS includes_digit_ct,
+ NULL AS filled_value_ct,
+ NULL AS min_text,
+ NULL AS max_text,
+ NULL AS upper_case_ct,
+ NULL AS lower_case_ct,
+ NULL AS non_alpha_ct,
+ NULL AS non_printing_ct,
+ NULL AS numeric_ct,
+ NULL AS date_ct,
+ NULL AS std_pattern_match,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT LEFT(LISTAGG(pattern, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1000) AS concat_pats
+ FROM (
+ SELECT TOP 5 CAST(COUNT(*) AS VARCHAR(40)) || ' | ' || pattern AS pattern,
+ COUNT(*) AS ct
+ FROM (SELECT REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE(
+ "{COL_NAME}"::VARCHAR, '[a-z]', 'a'),
+ '[A-Z]', 'A'),
+ '[0-9]', 'N') AS pattern
+ FROM target_table
+ WHERE "{COL_NAME}" > ' ' AND (SELECT MAX(LEN("{COL_NAME}"))
+ FROM target_table) BETWEEN 3 and {MAX_PATTERN_LENGTH}) p
+ GROUP BY pattern
+ HAVING pattern > ' '
+ ORDER BY COUNT(*) DESC) AS ps) AS top_patterns,
+-- TG-ELSE
+ NULL AS top_patterns,
+-- TG-ENDIF
+-- TG-IF is_type_N
+ MIN("{COL_NAME}") AS min_value,
+ MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
+ MAX("{COL_NAME}") AS max_value,
+ AVG(CAST("{COL_NAME}" AS FLOAT)) AS avg_value,
+ STDDEV(CAST("{COL_NAME}" AS FLOAT)) AS stdev_value,
+ MIN(pct_25) AS percentile_25,
+ MIN(pct_50) AS percentile_50,
+ MIN(pct_75) AS percentile_75,
+-- TG-ELSE
+ NULL AS min_value,
+ NULL AS min_value_over_0,
+ NULL AS max_value,
+ NULL AS avg_value,
+ NULL AS stdev_value,
+ NULL AS percentile_25,
+ NULL AS percentile_50,
+ NULL AS percentile_75,
+-- TG-ENDIF
+-- TG-IF is_N_decimal
+ SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) AS fractional_sum,
+-- TG-ELSE
+ NULL AS fractional_sum,
+-- TG-ENDIF
+-- TG-IF is_type_D
+ GREATEST(MIN("{COL_NAME}"), '0001-01-01') AS min_date,
+ MAX("{COL_NAME}") AS max_date,
+ COUNT(CASE WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 12 THEN 1 END) AS before_1yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 60 THEN 1 END) AS before_5yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 240 THEN 1 END) AS before_20yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 1200 THEN 1 END) AS before_100yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1 END) AS within_1yr_date_ct,
+ COUNT(CASE WHEN DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 30 THEN 1 END) AS within_1mo_date_ct,
+ COUNT(CASE WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 END) AS future_date_ct,
+ COUNT(CASE WHEN DATEDIFF('MON', '{RUN_DATE}', "{COL_NAME}") > 240 THEN 1 END) AS distant_future_date_ct,
+ COUNT(DISTINCT DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}')) AS date_days_present,
+ COUNT(DISTINCT <%DATEDIFF_WEEK;"{COL_NAME}";'{RUN_DATE}'%>) AS date_weeks_present,
+ COUNT(DISTINCT DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}')) AS date_months_present,
+-- TG-ELSE
+ NULL AS min_date,
+ NULL AS max_date,
+ NULL AS before_1yr_date_ct,
+ NULL AS before_5yr_date_ct,
+ NULL AS before_20yr_date_ct,
+ NULL AS before_100yr_date_ct,
+ NULL AS within_1yr_date_ct,
+ NULL AS within_1mo_date_ct,
+ NULL AS future_date_ct,
+ NULL AS distant_future_date_ct,
+ NULL AS date_days_present,
+ NULL AS date_weeks_present,
+ NULL AS date_months_present,
+-- TG-ENDIF
+-- TG-IF is_type_B
+ SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
+-- TG-ELSE
+ NULL AS boolean_true_ct,
+-- TG-ENDIF
+-- TG-IF is_type_A
+ (SELECT COUNT(DISTINCT REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
+ "{COL_NAME}"::VARCHAR, '[a-z]', 'a'),
+ '[A-Z]', 'A'),
+ '[0-9]', 'N')
+ ) AS pattern_ct
+ FROM target_table
+ WHERE "{COL_NAME}" > ' ') AS distinct_pattern_ct,
+ SUM(SIGN(REGEXP_COUNT(TRIM("{COL_NAME}"::VARCHAR), ' '))::BIGINT) AS embedded_space_ct,
+ AVG(REGEXP_COUNT(TRIM("{COL_NAME}"::VARCHAR), ' ')::FLOAT) AS avg_embedded_spaces,
+-- TG-ELSE
+ NULL AS distinct_pattern_ct,
+ NULL AS embedded_space_ct,
+ NULL AS avg_embedded_spaces,
+-- TG-ENDIF
+ '{PROFILE_RUN_ID}' AS profile_run_id
+ FROM target_table
+-- TG-IF is_N_sampling
+ ,
+ (SELECT
+ PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
+ PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
+ PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" SAMPLE ({SAMPLE_SIZE} rows) LIMIT 1 ) pctile
+-- TG-ENDIF
+-- TG-IF is_N_no_sampling
+ ,
+ (SELECT
+ PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
+ PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
+ PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
+ FROM "{DATA_SCHEMA}"."{DATA_TABLE}" LIMIT 1) pctile
+-- TG-ENDIF
diff --git a/testgen/template/flavors/snowflake/profiling/project_profiling_query.yaml b/testgen/template/flavors/snowflake/profiling/project_profiling_query.yaml
deleted file mode 100644
index 5c04fce8..00000000
--- a/testgen/template/flavors/snowflake/profiling/project_profiling_query.yaml
+++ /dev/null
@@ -1,211 +0,0 @@
----
-01_sampling: |
- WITH target_table AS (
- SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}" SAMPLE ({SAMPLE_SIZE} rows)
- )
- SELECT
-01_else: |
- WITH target_table AS (
- SELECT * FROM "{DATA_SCHEMA}"."{DATA_TABLE}"
- )
- SELECT
-01_all: |
- {CONNECTION_ID} as connection_id,
- '{PROJECT_CODE}' as project_code,
- '{TABLE_GROUPS_ID}' as table_groups_id,
- '{DATA_SCHEMA}' AS schema_name,
- '{RUN_DATE}' AS run_date,
- '{DATA_TABLE}' AS table_name,
- {COL_POS} AS position,
- '{COL_NAME_SANITIZED}' AS column_name,
- '{COL_TYPE}' AS column_type,
- '{DB_DATA_TYPE}' AS db_data_type,
- '{COL_GEN_TYPE}' AS general_type,
- COUNT(*) AS record_ct,
-
-02_X: |
- COUNT("{COL_NAME}") AS value_ct,
- COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
- SUM(NVL2("{COL_NAME}", 0, 1)) AS null_value_ct,
-02_else: |
- COUNT("{COL_NAME}") AS value_ct,
- COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
- SUM(NVL2("{COL_NAME}", 0, 1)) AS null_value_ct,
-
-03_ADN: MIN(LEN("{COL_NAME}")) AS min_length,
- MAX(LEN("{COL_NAME}")) AS max_length,
- AVG(NULLIF(LEN("{COL_NAME}"), 0)::FLOAT) AS avg_length,
-03_else: NULL as min_length,
- NULL as max_length,
- NULL as avg_length,
-
-04_A: COUNT(CASE
- WHEN REGEXP_LIKE(TRIM("{COL_NAME}"::VARCHAR), '^0(\.0*)?$') THEN 1
- END) AS zero_value_ct,
-04_N: SUM( 1 - ABS(SIGN("{COL_NAME}")) )::BIGINT AS zero_value_ct,
-04_else: NULL as zero_value_ct,
-
-05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) as distinct_std_value_ct,
- COUNT(CASE
- WHEN "{COL_NAME}" = '' THEN 1
- END) AS zero_length_ct,
- COUNT( CASE
- WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1
- END ) AS lead_space_ct,
- COUNT( CASE WHEN "{COL_NAME}"::VARCHAR ILIKE '"%"' OR "{COL_NAME}"::VARCHAR ILIKE '''%''' THEN 1 END ) as quoted_value_ct,
- COUNT( CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '.*[0-9].*') THEN 1 END ) as includes_digit_ct,
- COUNT( CASE
- WHEN "{COL_NAME}" IN ('.', '?', ' ') THEN 1
- WHEN LOWER("{COL_NAME}"::VARCHAR) REGEXP '-{2,}' OR LOWER("{COL_NAME}"::VARCHAR) REGEXP '0{2,}' OR LOWER("{COL_NAME}"::VARCHAR) REGEXP '9{2,}'
- OR LOWER("{COL_NAME}"::VARCHAR) REGEXP 'x{2,}' OR LOWER("{COL_NAME}"::VARCHAR) REGEXP 'z{2,}' THEN 1
- WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
- 'n/a','#na','none','null','unknown') THEN 1
- WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
- '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
- WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
- '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
- END ) AS filled_value_ct,
- LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text,
- LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text,
- COUNT( CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" <> LOWER("{COL_NAME}") THEN 1 END) AS upper_case_ct,
- COUNT( CASE WHEN "{COL_NAME}" = LOWER("{COL_NAME}") AND "{COL_NAME}" <> UPPER("{COL_NAME}") THEN 1 END) AS lower_case_ct,
- COUNT( CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" = LOWER("{COL_NAME}") THEN 1 END) AS non_alpha_ct,
- COUNT( CASE WHEN TRANSLATE("{COL_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), 'XXXXXXXXXX') <> "{COL_NAME}" THEN 1 END) as non_printing_ct,
- SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>) AS numeric_ct,
- SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>) AS date_ct,
- CASE
- WHEN CAST(SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$')
- THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'STREET_ADDR'
- WHEN CAST(SUM(CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
- THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'STATE_USA'
- WHEN CAST(SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^(\\+1|1)?[ .-]?(\\([2-9][0-9]{2}\\)|[2-9][0-9]{2})[ .-]?[2-9][0-9]{2}[ .-]?[0-9]{4}$')
- THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'PHONE_USA'
- WHEN CAST(SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$')
- THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'EMAIL'
- WHEN CAST(SUM( CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
- THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'ZIP_USA'
- WHEN CAST(SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^[\\w\\s\-]+\\.(txt|csv|tsv|dat|doc|pdf|xlsx)$')
- THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.9 THEN 'FILE_NAME'
- WHEN CAST(SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^([0-9]{4}[- ]?){3}[0-9]{4}$')
- THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'CREDIT_CARD'
- WHEN CAST(SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$')
- AND NOT REGEXP_LIKE("{COL_NAME}"::VARCHAR, '.*\\s(and|but|or|yet)\\s.*')
- THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'DELIMITED_DATA'
- WHEN SUM ( CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$')
- AND LEFT("{COL_NAME}", 3) NOT BETWEEN '734' AND '749'
- AND LEFT("{COL_NAME}", 3) <> '666' THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'SSN'
- END as std_pattern_match,
-05_else: NULL as distinct_std_value_ct,
- NULL as zero_length_ct,
- NULL as lead_space_ct,
- NULL as quoted_value_ct,
- NULL as includes_digit_ct,
- NULL as filled_value_ct,
- NULL as min_text,
- NULL as max_text,
- NULL as upper_case_ct,
- NULL as lower_case_ct,
- NULL as non_alpha_ct,
- NULL as non_printing_ct,
- NULL as numeric_ct,
- NULL as date_ct,
- NULL as std_pattern_match,
-
-06_A: ( SELECT LEFT(LISTAGG(pattern, ' | ') WITHIN GROUP (ORDER BY ct DESC), 1000) AS concat_pats
- FROM (
- SELECT TOP 5 CAST(COUNT(*) AS VARCHAR(40)) || ' | ' || pattern AS pattern,
- COUNT(*) AS ct
- FROM ( SELECT REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE(
- "{COL_NAME}"::VARCHAR, '[a-z]', 'a'),
- '[A-Z]', 'A'),
- '[0-9]', 'N') AS pattern
- FROM target_table
- WHERE "{COL_NAME}" > ' ' AND (SELECT MAX(LEN("{COL_NAME}"))
- FROM target_table) BETWEEN 3 and {MAX_PATTERN_LENGTH}) p
- GROUP BY pattern
- HAVING pattern > ' '
- ORDER BY COUNT(*) DESC) as ps) AS top_patterns,
-06_else: NULL as top_patterns,
-
-08_N: MIN("{COL_NAME}") AS min_value,
- MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
- MAX("{COL_NAME}") AS max_value,
- AVG(CAST("{COL_NAME}" AS FLOAT)) AS avg_value,
- STDDEV(CAST("{COL_NAME}" AS FLOAT)) AS stdev_value,
- MIN(pct_25) as percentile_25,
- MIN(pct_50) as percentile_50,
- MIN(pct_75) as percentile_75,
-08_else: NULL as min_value,
- NULL as min_value_over_0,
- NULL as max_value,
- NULL as avg_value,
- NULL as stdev_value,
- NULL as percentile_25,
- NULL as percentile_50,
- NULL as percentile_75,
-
-10_N_dec: SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) as fractional_sum,
-10_else: NULL as fractional_sum,
-
-11_D: GREATEST(MIN("{COL_NAME}"), '0001-01-01') as min_date,
- MAX("{COL_NAME}") as max_date,
- COUNT( CASE WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 12 THEN 1 END) AS before_1yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 60 THEN 1 END) AS before_5yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 240 THEN 1 END) AS before_20yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 1200 THEN 1 END) AS before_100yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1 END) AS within_1yr_date_ct,
- COUNT( CASE WHEN DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 30 THEN 1 END) AS within_1mo_date_ct,
- COUNT( CASE WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 END) AS future_date_ct,
- COUNT( CASE WHEN DATEDIFF('MON', '{RUN_DATE}', "{COL_NAME}") > 240 THEN 1 END) AS distant_future_date_ct,
- COUNT(DISTINCT DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}' ) ) as date_days_present,
- COUNT(DISTINCT DATEDIFF(week, "{COL_NAME}", '{RUN_DATE}' ) ) as date_weeks_present,
- COUNT(DISTINCT DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}' ) ) as date_months_present,
-11_else: NULL as min_date,
- NULL as max_date,
- NULL as before_1yr_date_ct,
- NULL as before_5yr_date_ct,
- NULL as before_20yr_date_ct,
- NULL AS before_100yr_date_ct,
- NULL as within_1yr_date_ct,
- NULL as within_1mo_date_ct,
- NULL as future_date_ct,
- NULL as distant_future_date_ct,
- NULL as date_days_present,
- NULL as date_weeks_present,
- NULL as date_months_present,
-
-12_B: SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
-12_else: NULL as boolean_true_ct,
-
-14_A: ( SELECT COUNT(DISTINCT REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE(
- "{COL_NAME}"::VARCHAR, '[a-z]', 'a'),
- '[A-Z]', 'A'),
- '[0-9]', 'N')
- ) AS pattern_ct
- FROM target_table
- WHERE "{COL_NAME}" > ' ' ) AS distinct_pattern_ct,
- SUM(SIGN(REGEXP_COUNT(TRIM("{COL_NAME}"::VARCHAR), ' '))::BIGINT) AS embedded_space_ct,
- AVG(REGEXP_COUNT(TRIM("{COL_NAME}"::VARCHAR), ' ')::FLOAT) AS avg_embedded_spaces,
-14_else: NULL as distinct_pattern_ct,
- NULL as embedded_space_ct,
- NULL as avg_embedded_spaces,
-
-16_all: " '{PROFILE_RUN_ID}' as profile_run_id "
-
-98_all: ' FROM target_table '
-
-99_N: |
- ,
- (SELECT
- PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
- PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
- PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
- FROM "{DATA_SCHEMA}"."{DATA_TABLE}" LIMIT 1) pctile
-99_N_sampling: |
- ,
- (SELECT
- PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_25,
- PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_50,
- PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") OVER () AS pct_75
- FROM "{DATA_SCHEMA}"."{DATA_TABLE}" SAMPLE ({SAMPLE_SIZE} rows) LIMIT 1 ) pctile
-99_else: ;
diff --git a/testgen/template/flavors/snowflake/profiling/templated_functions.yaml b/testgen/template/flavors/snowflake/profiling/templated_functions.yaml
index 1afbdea3..c9316784 100644
--- a/testgen/template/flavors/snowflake/profiling/templated_functions.yaml
+++ b/testgen/template/flavors/snowflake/profiling/templated_functions.yaml
@@ -53,3 +53,4 @@ IS_DATE: CASE
ELSE 0
END
+DATEDIFF_WEEK: (DATEADD('day', 1 - DAYOFWEEKISO({$2}::DATE), {$2}::DATE)::DATE - DATEADD('day', 1 - DAYOFWEEKISO({$1}::DATE), {$1}::DATE)::DATE) / 7
diff --git a/testgen/template/gen_funny_cat_tests/gen_Constant.sql b/testgen/template/gen_funny_cat_tests/gen_Constant.sql
index 4a0af8d6..4c66729e 100644
--- a/testgen/template/gen_funny_cat_tests/gen_Constant.sql
+++ b/testgen/template/gen_funny_cat_tests/gen_Constant.sql
@@ -10,7 +10,14 @@ latest_results AS (
SELECT p.*
FROM profile_results p
INNER JOIN latest_run lr ON p.run_date = lr.last_run_date
+ LEFT JOIN data_column_chars dcc ON (
+ p.table_groups_id = dcc.table_groups_id
+ AND p.schema_name = dcc.schema_name
+ AND p.table_name = dcc.table_name
+ AND p.column_name = dcc.column_name
+ )
WHERE p.table_groups_id = :TABLE_GROUPS_ID ::UUID
+ AND dcc.excluded_data_element IS NOT TRUE
),
all_runs AS (
SELECT DISTINCT table_groups_id, run_date,
@@ -42,7 +49,14 @@ selected_columns AS (
rr.table_groups_id = p.table_groups_id
AND rr.run_date = p.run_date
)
+ LEFT JOIN data_column_chars dcc ON (
+ p.table_groups_id = dcc.table_groups_id
+ AND p.schema_name = dcc.schema_name
+ AND p.table_name = dcc.table_name
+ AND p.column_name = dcc.column_name
+ )
WHERE p.table_groups_id = :TABLE_GROUPS_ID ::UUID
+ AND dcc.excluded_data_element IS NOT TRUE
-- No dates as constants
AND NOT (p.general_type = 'D' AND rr.run_rank = 1)
GROUP BY p.schema_name, p.table_name, p.column_name
diff --git a/testgen/template/gen_funny_cat_tests/gen_Distinct_Value_Ct.sql b/testgen/template/gen_funny_cat_tests/gen_Distinct_Value_Ct.sql
index c06b458a..a7c186f2 100644
--- a/testgen/template/gen_funny_cat_tests/gen_Distinct_Value_Ct.sql
+++ b/testgen/template/gen_funny_cat_tests/gen_Distinct_Value_Ct.sql
@@ -13,7 +13,14 @@ latest_results AS (
SELECT p.*
FROM profile_results p
INNER JOIN latest_run lr ON p.run_date = lr.last_run_date
+ LEFT JOIN data_column_chars dcc ON (
+ p.table_groups_id = dcc.table_groups_id
+ AND p.schema_name = dcc.schema_name
+ AND p.table_name = dcc.table_name
+ AND p.column_name = dcc.column_name
+ )
WHERE p.table_groups_id = :TABLE_GROUPS_ID ::UUID
+ AND dcc.excluded_data_element IS NOT TRUE
),
all_runs AS (
SELECT DISTINCT table_groups_id, run_date,
@@ -45,7 +52,14 @@ selected_columns AS (
rr.table_groups_id = p.table_groups_id
AND rr.run_date = p.run_date
)
+ LEFT JOIN data_column_chars dcc ON (
+ p.table_groups_id = dcc.table_groups_id
+ AND p.schema_name = dcc.schema_name
+ AND p.table_name = dcc.table_name
+ AND p.column_name = dcc.column_name
+ )
WHERE p.table_groups_id = :TABLE_GROUPS_ID ::UUID
+ AND dcc.excluded_data_element IS NOT TRUE
GROUP BY p.schema_name, p.table_name, p.column_name
HAVING SUM(CASE WHEN p.distinct_value_ct = 1 THEN 0 ELSE 1 END) = 0
AND (
diff --git a/testgen/template/generation/gen_selection_tests.sql b/testgen/template/generation/gen_selection_tests.sql
index c6b846dd..ca85796f 100644
--- a/testgen/template/generation/gen_selection_tests.sql
+++ b/testgen/template/generation/gen_selection_tests.sql
@@ -11,6 +11,14 @@ selected_columns AS (
FROM profile_results p
INNER JOIN latest_run lr ON p.run_date = lr.last_run_date
WHERE p.table_groups_id = :TABLE_GROUPS_ID ::UUID
+ AND NOT EXISTS (
+ SELECT 1 FROM data_column_chars dcc
+ WHERE dcc.table_groups_id = p.table_groups_id
+ AND dcc.schema_name = p.schema_name
+ AND dcc.table_name = p.table_name
+ AND dcc.column_name = p.column_name
+ AND dcc.excluded_data_element IS TRUE
+ )
AND {SELECTION_CRITERIA}
)
INSERT INTO test_definitions (
diff --git a/testgen/template/profiling/functional_datatype.sql b/testgen/template/profiling/functional_datatype.sql
index e3c66599..7b7832d2 100644
--- a/testgen/template/profiling/functional_datatype.sql
+++ b/testgen/template/profiling/functional_datatype.sql
@@ -491,6 +491,9 @@ SET functional_data_type =
WHEN general_type='N'
AND ( column_type ILIKE '%int%'
OR
+ (SPLIT_PART(column_type, ',', 2) > ''
+ AND RTRIM(SPLIT_PART(column_type, ',', 2), ' )') = '0')
+ OR
(RTRIM(SPLIT_PART(column_type, ',', 2), ')') > '0'
AND fractional_sum = 0) -- 0 implies integer; null is float or non-numeric
) THEN 'Measurement Discrete'
diff --git a/testgen/template/profiling/pii_flag_update.sql b/testgen/template/profiling/pii_flag_update.sql
new file mode 100644
index 00000000..644122f2
--- /dev/null
+++ b/testgen/template/profiling/pii_flag_update.sql
@@ -0,0 +1,18 @@
+-- Propagate pii_flag from profile_results to data_column_chars
+-- Clears existing flags first, then sets flags from the latest profiling run
+UPDATE data_column_chars
+ SET pii_flag = NULL
+ WHERE table_groups_id = :TABLE_GROUPS_ID;
+
+WITH pii_selects
+ AS ( SELECT table_groups_id, schema_name, table_name, column_name, pii_flag
+ FROM profile_results
+ WHERE profile_run_id = :PROFILE_RUN_ID
+ AND pii_flag IS NOT NULL )
+UPDATE data_column_chars
+ SET pii_flag = pii_selects.pii_flag
+ FROM pii_selects
+ WHERE data_column_chars.table_groups_id = pii_selects.table_groups_id
+ AND data_column_chars.schema_name = pii_selects.schema_name
+ AND data_column_chars.table_name = pii_selects.table_name
+ AND data_column_chars.column_name = pii_selects.column_name;
diff --git a/testgen/template/score_cards/get_score_card_issues_by_column.sql b/testgen/template/score_cards/get_score_card_issues_by_column.sql
index c0e9724e..c2955a5f 100644
--- a/testgen/template/score_cards/get_score_card_issues_by_column.sql
+++ b/testgen/template/score_cards/get_score_card_issues_by_column.sql
@@ -14,6 +14,8 @@ anomalies AS (
types.anomaly_name AS type,
types.issue_likelihood AS status,
results.detail,
+ types.detail_redactable,
+ dcc.pii_flag,
EXTRACT(
EPOCH
FROM runs.profiling_starttime
@@ -24,6 +26,12 @@ anomalies AS (
FROM profile_anomaly_results AS results
INNER JOIN profile_anomaly_types AS types ON (types.id = results.anomaly_id)
INNER JOIN profiling_runs AS runs ON (runs.id = results.profile_run_id)
+ LEFT JOIN data_column_chars AS dcc ON (
+ results.table_groups_id = dcc.table_groups_id
+ AND results.schema_name = dcc.schema_name
+ AND results.table_name = dcc.table_name
+ AND results.column_name = dcc.column_name
+ )
INNER JOIN score_profiling_runs ON (
score_profiling_runs.profile_run_id = runs.id
AND score_profiling_runs.table_name = results.table_name
@@ -47,6 +55,8 @@ tests AS (
test_types.test_name_short AS type,
result_status AS status,
result_message AS detail,
+ NULL::BOOLEAN AS detail_redactable,
+ NULL AS pii_flag,
EXTRACT(
EPOCH
FROM test_time
diff --git a/testgen/template/score_cards/get_score_card_issues_by_dimension.sql b/testgen/template/score_cards/get_score_card_issues_by_dimension.sql
index 8afb5d85..74830695 100644
--- a/testgen/template/score_cards/get_score_card_issues_by_dimension.sql
+++ b/testgen/template/score_cards/get_score_card_issues_by_dimension.sql
@@ -14,6 +14,8 @@ anomalies AS (
types.anomaly_name AS type,
types.issue_likelihood AS status,
results.detail,
+ types.detail_redactable,
+ dcc.pii_flag,
EXTRACT(
EPOCH
FROM runs.profiling_starttime
@@ -24,6 +26,12 @@ anomalies AS (
FROM profile_anomaly_results AS results
INNER JOIN profile_anomaly_types AS types ON (types.id = results.anomaly_id)
INNER JOIN profiling_runs AS runs ON (runs.id = results.profile_run_id)
+ LEFT JOIN data_column_chars AS dcc ON (
+ results.table_groups_id = dcc.table_groups_id
+ AND results.schema_name = dcc.schema_name
+ AND results.table_name = dcc.table_name
+ AND results.column_name = dcc.column_name
+ )
INNER JOIN score_profiling_runs ON (
score_profiling_runs.profile_run_id = runs.id
AND score_profiling_runs.table_name = results.table_name
@@ -48,6 +56,8 @@ tests AS (
test_types.test_name_short AS type,
result_status AS status,
result_message AS detail,
+ NULL::BOOLEAN AS detail_redactable,
+ NULL AS pii_flag,
EXTRACT(
EPOCH
FROM test_time
diff --git a/testgen/ui/app.py b/testgen/ui/app.py
index 9358938c..5ed2bc72 100644
--- a/testgen/ui/app.py
+++ b/testgen/ui/app.py
@@ -1,11 +1,12 @@
import logging
+from urllib.parse import urlparse
import streamlit as st
from testgen import settings
from testgen.common import version_service
from testgen.common.docker_service import check_basic_configuration
-from testgen.common.models import with_database_session
+from testgen.common.models import get_current_session, with_database_session
from testgen.common.models.project import Project
from testgen.ui import bootstrap
from testgen.ui.assets import get_asset_path
@@ -16,50 +17,74 @@
@with_database_session
def render(log_level: int = logging.INFO):
- st.set_page_config(
- page_title="TestGen",
- page_icon=get_asset_path("favicon.ico"),
- layout="wide",
- # Collapse when logging out because the sidebar takes some time to be removed from the DOM
- # Collapse for Catalog role since they only have access to one page
- initial_sidebar_state="collapsed"
- if session.auth and (session.auth.logging_out or (session.auth.is_logged_in and not session.auth.user_has_permission("view")))
- else "auto",
- )
-
- application = get_application(log_level=log_level)
- application.logger.debug("Starting Streamlit re-run")
- if not session.auth:
- session.auth = application.auth_class()
-
- status_ok, message = check_basic_configuration()
- if not status_ok:
- st.markdown(f":red[{message}]")
- return
-
- set_locale()
-
- session.sidebar_project = (
- session.page_args_pending_router and session.page_args_pending_router.get("project_code")
- ) or st.query_params.get("project_code", session.sidebar_project)
-
- if not session.auth.is_logged_in and not session.auth.logging_out:
- session.auth.load_user_session()
-
- application.logo.render()
-
- if session.auth.is_logged_in and not session.auth.logging_in:
- with st.sidebar:
- testgen.sidebar(
- projects=Project.select_where(),
- current_project=session.sidebar_project,
- menu=application.menu,
- current_page=session.current_page,
- version=version_service.get_version(),
- support_email=settings.SUPPORT_EMAIL,
- )
-
- application.router.run()
+ try:
+ st.set_page_config(
+ page_title="TestGen",
+ page_icon=get_asset_path("favicon.ico"),
+ layout="wide",
+ # Collapse when logging out or on the no-project page (no sidebar content on either)
+ initial_sidebar_state="collapsed"
+ if (session.auth and session.auth.logging_out) or session.current_page == "no-project"
+ else "auto",
+ )
+
+ application = get_application(log_level=log_level)
+ application.logger.debug("Starting Streamlit re-run")
+ if not session.auth:
+ session.auth = application.auth_class()
+
+ status_ok, message = check_basic_configuration()
+ if not status_ok:
+ st.markdown(f":red[{message}]")
+ return
+
+ set_locale()
+
+ if session.auth.logging_out:
+ session.sidebar_project = None
+ else:
+ session.sidebar_project = (
+ session.page_args_pending_router and session.page_args_pending_router.get("project_code")
+ ) or st.query_params.get("project_code", session.sidebar_project)
+
+ if not session.auth.is_logged_in and not session.auth.logging_out:
+ session.auth.load_user_session()
+
+ if session.auth.is_logged_in and not session.auth.logging_out:
+ session.auth.load_user_role()
+
+ application.logo.render()
+
+ if session.auth.is_logged_in and not session.auth.logging_in and not session.auth.logging_out:
+ current_page = session.current_page
+ if not current_page:
+ try:
+ current_page = urlparse(st.context.url).path.lstrip("/")
+ except Exception:
+ current_page = ""
+ is_global_context = current_page in application.global_admin_paths
+ if current_page != "no-project":
+ with st.sidebar:
+ testgen.sidebar(
+ projects=[] if is_global_context else [
+ p for p in Project.select_where() if session.auth.user_has_project_access(p.project_code)
+ ],
+ current_project=None if is_global_context else session.sidebar_project,
+ menu=application.menu,
+ current_page=session.current_page,
+ version=version_service.get_version(),
+ support_email=settings.SUPPORT_EMAIL,
+ global_context=is_global_context,
+ is_global_admin=session.auth.user_has_permission("global_admin") and bool(application.global_admin_paths),
+ )
+
+ application.router.run()
+ finally:
+ # Safety net: commit any flushed-but-uncommitted work (e.g., PersistedSetting writes)
+ # before RerunException propagates and bypasses database_session()'s normal commit.
+ db_session = get_current_session()
+ if db_session:
+ db_session.commit()
@st.cache_resource(validate=lambda _: not settings.IS_DEBUG, show_spinner=False)
diff --git a/testgen/ui/assets/flavors/oracle.svg b/testgen/ui/assets/flavors/oracle.svg
new file mode 100644
index 00000000..eef50c67
--- /dev/null
+++ b/testgen/ui/assets/flavors/oracle.svg
@@ -0,0 +1,58 @@
+
+
+
+
+
+Oracle
+
+
+Oracle
diff --git a/testgen/ui/assets/flavors/sap_hana.svg b/testgen/ui/assets/flavors/sap_hana.svg
new file mode 100644
index 00000000..446764d2
--- /dev/null
+++ b/testgen/ui/assets/flavors/sap_hana.svg
@@ -0,0 +1,66 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/testgen/ui/auth.py b/testgen/ui/auth.py
index 14706465..9518f8b7 100644
--- a/testgen/ui/auth.py
+++ b/testgen/ui/auth.py
@@ -1,20 +1,19 @@
-import base64
import logging
-from datetime import UTC, datetime
from typing import Literal
import extra_streamlit_components as stx
-import jwt
import streamlit as st
-from testgen import settings
+from testgen.common.auth import decode_jwt_token, get_jwt_signing_key
from testgen.common.mixpanel_service import MixpanelService
+from testgen.common.models.project_membership import ProjectMembership, RoleType
from testgen.common.models.user import User
from testgen.ui.services.javascript_service import execute_javascript
+from testgen.ui.session import session
LOG = logging.getLogger("testgen")
-Permission = Literal["catalog", "view", "disposition", "edit", "administer"]
+Permission = Literal["catalog", "view", "disposition", "view_pii", "edit", "administer", "global_admin"]
class Authentication:
@@ -23,6 +22,7 @@ class Authentication:
jwt_cookie_expiry_days = 1
user: User | None = None
+ role: RoleType | None = None
# Intermediate state holders because auth cookie changes are not immediate
cookies_ready: bool = False
@@ -38,16 +38,22 @@ def user_display(self) -> str | None:
return (self.user.name or self.user.username) if self.user else None
@property
- def default_page(self) -> str | None:
+ def current_project(self) -> str | None:
+ return session.sidebar_project
+
+ def get_default_page(self, project_code: str | None = None) -> str: # noqa: ARG002
return "project-dashboard" if self.user else ""
- def user_has_permission(self, _permission: Permission) -> bool:
+ def user_has_permission(self, permission: Permission, /, project_code: str | None = None) -> bool: # noqa: ARG002
+ return True
+
+ def user_has_project_access(self, project_code: str) -> bool: # noqa: ARG002
return True
def get_jwt_hashing_key(self) -> bytes:
try:
- return base64.b64decode(settings.JWT_HASHING_KEY_B64.encode("ascii"))
- except Exception as e:
+ return get_jwt_signing_key()
+ except Exception:
st.error(
"Error reading the JWT signing key from settings.\n\n Make sure you have a valid "
"base64 string assigned to the TG_JWT_HASHING_KEY environment variable."
@@ -67,22 +73,31 @@ def get_credentials(self):
def login_user(self, username: str) -> None:
self.user = User.get(username)
self.user.save(update_latest_login=True)
- MixpanelService().send_event("login", include_usage=True, role=self.user.role)
+ self.load_user_role()
+ MixpanelService().send_event("login", include_usage=True, role=self.role)
def load_user_session(self) -> None:
cookies = self._load_cookies()
token = cookies.get(self.jwt_cookie_name)
if token is not None:
try:
- token = jwt.decode(token, self.get_jwt_hashing_key(), algorithms=["HS256"])
- if token["exp_date"] > datetime.now(UTC).timestamp():
- self.user = User.get(token["username"])
+ payload = decode_jwt_token(token)
+ self.user = User.get(payload["username"])
+ self.load_user_role()
except Exception:
LOG.debug("Invalid auth token found on cookies", exc_info=True, stack_info=True)
+ def load_user_role(self) -> None:
+ if self.user and self.current_project:
+ membership = ProjectMembership.get_by_user_and_project(self.user.id, self.current_project)
+ self.role = membership.role if membership else None
+ else:
+ self.role = None
+
def end_user_session(self) -> None:
self._clear_jwt_cookie()
self.user = None
+ self.role = None
def _clear_jwt_cookie(self) -> None:
execute_javascript(
diff --git a/testgen/ui/bootstrap.py b/testgen/ui/bootstrap.py
index b21cf6a2..52eb2b47 100644
--- a/testgen/ui/bootstrap.py
+++ b/testgen/ui/bootstrap.py
@@ -50,12 +50,13 @@
class Application(singleton.Singleton):
- def __init__(self, auth_class: Authentication, logo: plugins.Logo, router: Router, menu: Menu, logger: logging.Logger) -> None:
+ def __init__(self, auth_class: Authentication, logo: plugins.Logo, router: Router, menu: Menu, logger: logging.Logger, global_admin_paths: frozenset[str]) -> None:
self.auth_class = auth_class
self.logo = logo
self.router = router
self.menu = menu
self.logger = logger
+ self.global_admin_paths = global_admin_paths
def run(log_level: int = logging.INFO) -> Application:
@@ -77,10 +78,9 @@ def run(log_level: int = logging.INFO) -> Application:
logo_class = plugins.Logo
for plugin in installed_plugins:
- spec = plugin.load()
+ spec = plugin.load_streamlit()
- if spec.page:
- pages.append(spec.page)
+ pages.extend(spec.pages)
if spec.auth:
auth_class = spec.auth
@@ -104,4 +104,5 @@ def run(log_level: int = logging.INFO) -> Application:
),
),
logger=LOG,
+ global_admin_paths=frozenset(page.path for page in pages if page.permission == "global_admin"),
)
diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css
index 8390aafe..f3550307 100644
--- a/testgen/ui/components/frontend/css/shared.css
+++ b/testgen/ui/components/frontend/css/shared.css
@@ -226,10 +226,26 @@ body {
color: var(--error-color);
}
+.text-warning {
+ color: var(--orange);
+}
+
.text-green {
color: var(--primary-color);
}
+.text-purple {
+ color: var(--purple);
+}
+
+.text-orange {
+ color: var(--orange);
+}
+
+.text-brown {
+ color: var(--brown);
+}
+
.text-capitalize {
text-transform: capitalize;
}
@@ -744,7 +760,3 @@ input::-ms-clear {
.notifications--empty.tg-empty-state {
margin-top: 0;
}
-
-.warning-text {
- color: var(--orange);
-}
diff --git a/testgen/ui/components/frontend/js/components/attribute.js b/testgen/ui/components/frontend/js/components/attribute.js
index 61240f7f..a7bb60eb 100644
--- a/testgen/ui/components/frontend/js/components/attribute.js
+++ b/testgen/ui/components/frontend/js/components/attribute.js
@@ -8,11 +8,12 @@
* @property {string?} class
*/
import { getValue, loadStylesheet } from '../utils.js';
+import { PII_REDACTED } from '../display_utils.js';
import { Icon } from './icon.js';
import { withTooltip } from './tooltip.js';
import van from '../van.min.js';
-const { div } = van.tags;
+const { div, code } = van.tags;
const Attribute = (/** @type Properties */ props) => {
loadStylesheet('attribute', stylesheet);
@@ -33,6 +34,12 @@ const Attribute = (/** @type Properties */ props) => {
{ class: 'attribute-value' },
() => {
const value = getValue(props.value);
+ if (value === PII_REDACTED) {
+ return withTooltip(
+ code({ class: 'attribute-pii-redacted' }, 'PII Redacted'),
+ { text: 'You do not have permission to view PII data', position: 'top-right' },
+ );
+ }
return (value || value === 0) ? value : '--';
},
),
@@ -44,6 +51,16 @@ stylesheet.replace(`
.attribute-value {
word-wrap: break-word;
}
+
+.attribute-pii-redacted {
+ display: inline-block;
+ font-size: 12px;
+ padding: 2px 6px;
+ border-radius: 4px;
+ background: color-mix(in srgb, var(--disabled-text-color) 15%, transparent);
+ color: var(--disabled-text-color);
+ overflow: visible;
+}
`);
export { Attribute };
diff --git a/testgen/ui/components/frontend/js/components/connection_form.js b/testgen/ui/components/frontend/js/components/connection_form.js
index 011e425a..53100d97 100644
--- a/testgen/ui/components/frontend/js/components/connection_form.js
+++ b/testgen/ui/components/frontend/js/components/connection_form.js
@@ -83,6 +83,8 @@ const defaultPorts = {
postgresql: '5432',
snowflake: '443',
databricks: '443',
+ oracle: '1521',
+ sap_hana: '39015',
};
/**
@@ -234,6 +236,27 @@ const ConnectionForm = (props, saveButton) => {
connection,
dynamicConnectionUrl,
),
+ oracle: () => OracleForm(
+ updatedConnection,
+ getValue(props.flavors).find(f => f.value === connectionFlavor.rawVal),
+ (formValue, isValid) => {
+ updatedConnection.val = {...updatedConnection.val, ...formValue};
+ setFieldValidity('oracle_form', isValid);
+ },
+ connection,
+ dynamicConnectionUrl,
+ { dbNameLabel: 'Service Name' },
+ ),
+ sap_hana: () => OracleForm(
+ updatedConnection,
+ getValue(props.flavors).find(f => f.value === connectionFlavor.rawVal),
+ (formValue, isValid) => {
+ updatedConnection.val = {...updatedConnection.val, ...formValue};
+ setFieldValidity('sap_hana_form', isValid);
+ },
+ connection,
+ dynamicConnectionUrl,
+ ),
bigquery: () => BigqueryForm(
updatedConnection,
getValue(props.flavors).find(f => f.value === connectionFlavor.rawVal),
@@ -380,6 +403,7 @@ const ConnectionForm = (props, saveButton) => {
* @param {(params: Partial, isValid: boolean) => void} onChange
* @param {Connection?} originalConnection
* @param {VanState} dynamicConnectionUrl
+ * @param {{dbNameLabel: string}?} options
* @returns {HTMLElement}
*/
const RedshiftForm = (
@@ -388,6 +412,7 @@ const RedshiftForm = (
onChange,
originalConnection,
dynamicConnectionUrl,
+ options,
) => {
const isValid = van.state(true);
const connectByUrl = van.state(connection.rawVal.connect_by_url ?? false);
@@ -479,7 +504,7 @@ const RedshiftForm = (
),
Input({
name: 'db_name',
- label: 'Database',
+ label: options?.dbNameLabel || 'Database',
value: connectionDatabase,
disabled: connectByUrl,
onChange: (value, state) => {
@@ -552,6 +577,8 @@ const RedshiftSpectrumForm = RedshiftForm;
const PostgresqlForm = RedshiftForm;
+const OracleForm = RedshiftForm;
+
const AzureMSSQLForm = (
connection,
flavor,
@@ -766,10 +793,11 @@ const DatabricksForm = (
) => {
const isValid = van.state(true);
const connectByUrl = van.state(connection.rawVal?.connect_by_url ?? false);
+ const useOAuth = van.state(connection.rawVal?.connect_by_key ?? false);
const connectionHost = van.state(connection.rawVal?.project_host ?? '');
const connectionPort = van.state(connection.rawVal?.project_port || defaultPorts[flavor.flavor]);
const connectionHttpPath = van.state(connection.rawVal?.http_path ?? '');
- const connectionDatabase = van.state(connection.rawVal?.project_db ?? '');
+ const connectionCatalog = van.state(connection.rawVal?.project_db ?? '');
const connectionUsername = van.state(connection.rawVal?.project_user ?? '');
const connectionPassword = van.state(connection.rawVal?.project_pw_encrypted ?? '');
const connectionUrl = van.state(connection.rawVal?.url ?? '');
@@ -780,13 +808,13 @@ const DatabricksForm = (
onChange({
project_host: connectionHost.val,
project_port: connectionPort.val,
- project_db: connectionDatabase.val,
- project_user: connectionUsername.val,
+ project_db: connectionCatalog.val,
+ project_user: useOAuth.val ? connectionUsername.val : 'token',
project_pw_encrypted: connectionPassword.val,
http_path: connectionHttpPath.val,
connect_by_url: connectByUrl.val,
url: connectByUrl.val ? connectionUrl.val : connectionUrl.rawVal,
- connect_by_key: false,
+ connect_by_key: useOAuth.val,
}, isValid.val);
});
@@ -803,7 +831,7 @@ const DatabricksForm = (
{ class: 'flex-column border border-radius-1 p-3 mt-1 fx-gap-1', style: 'position: relative;' },
Caption({content: 'Server', style: 'position: absolute; top: -10px; background: var(--app-background-color); padding: 0px 8px;' }),
- RadioGroup({
+ () => useOAuth.val ? div() : RadioGroup({
label: 'Connect by',
options: [
{
@@ -868,16 +896,17 @@ const DatabricksForm = (
},
validators: [
requiredIf(() => !connectByUrl.val),
- maxLength(50),
+ maxLength(200),
],
}),
Input({
name: 'db_name',
- label: 'Database',
- value: connectionDatabase,
+ label: 'Catalog',
+ value: connectionCatalog,
+ value: connectionCatalog,
disabled: connectByUrl,
onChange: (value, state) => {
- connectionDatabase.val = value;
+ connectionCatalog.val = value;
validityPerField['db_name'] = state.valid;
isValid.val = Object.values(validityPerField).every(v => v);
},
@@ -906,38 +935,84 @@ const DatabricksForm = (
}),
),
),
-
div(
{ class: 'flex-column border border-radius-1 p-3 mt-1 fx-gap-1', style: 'position: relative;' },
Caption({content: 'Authentication', style: 'position: absolute; top: -10px; background: var(--app-background-color); padding: 0px 8px;' }),
- Input({
- name: 'db_user',
- label: 'Username',
- value: connectionUsername,
- onChange: (value, state) => {
- connectionUsername.val = value;
- validityPerField['db_user'] = state.valid;
- isValid.val = Object.values(validityPerField).every(v => v);
- },
- validators: [
- required,
- maxLength(50),
+ RadioGroup({
+ label: 'Authentication method',
+ options: [
+ {label: 'Access Token (PAT)', value: false},
+ {label: 'Service Principal (OAuth)', value: true},
],
- }),
- Input({
- name: 'password',
- label: 'Password',
- value: connectionPassword,
- type: 'password',
- passwordSuggestions: false,
- placeholder: (originalConnection?.connection_id && originalConnection?.project_pw_encrypted) ? secretsPlaceholder : '',
- onChange: (value, state) => {
- connectionPassword.val = value;
- validityPerField['password'] = state.valid;
+ value: useOAuth,
+ onChange: (value) => {
+ useOAuth.val = value;
+ connectionPassword.val = '';
+ delete validityPerField['password'];
+ if (value) {
+ connectByUrl.val = false;
+ delete validityPerField['db_user'];
+ }
isValid.val = Object.values(validityPerField).every(v => v);
},
+ layout: 'inline',
}),
+
+ () => {
+ if (useOAuth.val) {
+ return div(
+ { class: 'flex-column fx-gap-3' },
+ Input({
+ name: 'db_user',
+ label: 'Client ID',
+ value: connectionUsername,
+ onChange: (value, state) => {
+ connectionUsername.val = value;
+ validityPerField['db_user'] = state.valid;
+ isValid.val = Object.values(validityPerField).every(v => v);
+ },
+ validators: [
+ required,
+ maxLength(100),
+ ],
+ }),
+ Input({
+ name: 'password',
+ label: 'Client Secret',
+ value: connectionPassword,
+ type: 'password',
+ passwordSuggestions: false,
+ placeholder: (originalConnection?.connection_id && originalConnection?.project_pw_encrypted) ? secretsPlaceholder : '',
+ onChange: (value, state) => {
+ connectionPassword.val = value;
+ validityPerField['password'] = state.valid;
+ isValid.val = Object.values(validityPerField).every(v => v);
+ },
+ validators: [
+ requiredIf(() => !originalConnection?.connection_id || !originalConnection?.project_pw_encrypted),
+ ],
+ }),
+ );
+ }
+
+ return Input({
+ name: 'password',
+ label: 'Access Token',
+ value: connectionPassword,
+ type: 'password',
+ passwordSuggestions: false,
+ placeholder: (originalConnection?.connection_id && originalConnection?.project_pw_encrypted) ? secretsPlaceholder : '',
+ onChange: (value, state) => {
+ connectionPassword.val = value;
+ validityPerField['password'] = state.valid;
+ isValid.val = Object.values(validityPerField).every(v => v);
+ },
+ validators: [
+ requiredIf(() => !originalConnection?.connection_id || !originalConnection?.project_pw_encrypted),
+ ],
+ });
+ },
),
);
};
diff --git a/testgen/ui/components/frontend/js/components/dialog.js b/testgen/ui/components/frontend/js/components/dialog.js
new file mode 100644
index 00000000..788a85eb
--- /dev/null
+++ b/testgen/ui/components/frontend/js/components/dialog.js
@@ -0,0 +1,134 @@
+/**
+ * @typedef DialogProps
+ * @type {object}
+ * @property {(string | import('../van.min.js').State)} title - Dialog title
+ * @property {import('../van.min.js').State} open - Reactive open state
+ * @property {Function} onClose - Called when the dialog is closed (backdrop click or X button)
+ * @property {string} [width] - CSS width value, default '30rem'
+ */
+import van from '../van.min.js';
+import { getValue, loadStylesheet } from '../utils.js';
+
+const { button, div, i, span } = van.tags;
+
+/**
+ * A dialog component that mimics Streamlit's dialog visual style.
+ * Opens as a fixed-position overlay covering the full viewport so it
+ * works from within any V2 component container, regardless of depth.
+ *
+ * Usage:
+ * const open = van.state(false);
+ *
+ * Dialog(
+ * { title: 'Confirm', open, onClose: () => open.val = false },
+ * div('Are you sure?'),
+ * Button({ label: 'Confirm', onclick: () => { doThing(); open.val = false; } }),
+ * )
+ *
+ * @param {DialogProps} props
+ * @param {...(Element | string)} children - Content rendered in the dialog body
+ */
+const Dialog = ({ title, open, onClose, width = '30rem' }, ...children) => {
+ loadStylesheet('dialog', stylesheet);
+
+ return div(
+ {
+ class: 'tg-dialog-overlay',
+ style: () => open.val ? '' : 'display: none',
+ onclick: () => onClose(),
+ },
+ div(
+ {
+ class: 'tg-dialog',
+ role: 'dialog',
+ 'aria-modal': 'true',
+ tabindex: '-1',
+ style: () => `width: ${getValue(width)}`,
+ onclick: (e) => e.stopPropagation(),
+ },
+ div(
+ { class: 'tg-dialog-header' },
+ span({ class: 'tg-dialog-title' }, title),
+ ),
+ div({ class: 'tg-dialog-content' }, ...children),
+ button(
+ {
+ class: 'tg-dialog-close',
+ 'aria-label': 'Close',
+ onclick: () => onClose(),
+ },
+ i({ class: 'material-symbols-rounded' }, 'close'),
+ ),
+ ),
+ );
+};
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+.tg-dialog-overlay {
+ position: fixed;
+ inset: 0;
+ z-index: 1000;
+ background: rgba(49, 51, 63, 0.5);
+ display: flex;
+ align-items: center;
+ justify-content: center;
+}
+
+.tg-dialog {
+ position: relative;
+ background: var(--portal-background, white);
+ border-radius: 8px;
+ box-shadow: var(--portal-box-shadow, 0 4px 32px rgba(0, 0, 0, 0.25));
+ max-width: calc(100vw - 2rem);
+ max-height: 80vh;
+ display: flex;
+ flex-direction: column;
+ overflow: hidden;
+}
+
+.tg-dialog-header {
+ padding: 1.5rem 3.5rem 0.75rem 1.5rem;
+ font-size: 1.5rem;
+ font-weight: 600;
+ line-height: 1.5;
+ display: flex;
+ align-items: center;
+ flex-shrink: 0;
+}
+
+.tg-dialog-content {
+ padding: 0.75rem 1.5rem 1.5rem;
+ overflow-y: auto;
+ color: var(--primary-text-color);
+}
+
+.tg-dialog-close {
+ position: absolute;
+ top: 0.75rem;
+ right: 0.75rem;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ width: 2rem;
+ height: 2rem;
+ padding: 0;
+ border: none;
+ border-radius: 4px;
+ background: transparent;
+ cursor: pointer;
+ color: var(--secondary-text-color);
+ transition: background 200ms;
+}
+
+.tg-dialog-close:hover {
+ background: rgba(0, 0, 0, 0.08);
+}
+
+.tg-dialog-close .material-symbols-rounded {
+ font-size: 18px;
+ line-height: 18px;
+}
+`);
+
+export { Dialog };
diff --git a/testgen/ui/components/frontend/js/components/file_input.js b/testgen/ui/components/frontend/js/components/file_input.js
index 5b49f503..77738aa0 100644
--- a/testgen/ui/components/frontend/js/components/file_input.js
+++ b/testgen/ui/components/frontend/js/components/file_input.js
@@ -15,14 +15,16 @@
* @property {string} name
* @property {string} value
* @property {string?} class
+ * @property {string?} help
* @property {Array?} validators
* @property {function(FileValue?, InputState)?} onChange
- *
+ *
*/
import van from '../van.min.js';
import { checkIsRequired, getRandomId, getValue, loadStylesheet } from "../utils.js";
import { Icon } from './icon.js';
import { Button } from './button.js';
+import { withTooltip } from './tooltip.js';
import { humanReadableSize } from '../display_utils.js';
const { div, input, label, span } = van.tags;
@@ -112,12 +114,18 @@ const FileInput = (options) => {
return div(
{ class: cssClass },
- label(
+ div(
{ class: 'tg-file-uploader--label text-caption flex-row fx-gap-1' },
options.label,
() => isRequired.val
? span({ class: 'text-error' }, '*')
: '',
+ () => getValue(options.help)
+ ? withTooltip(
+ Icon({ size: 16, classes: 'text-disabled' }, 'help'),
+ { text: options.help, position: 'bottom', width: 200 }
+ )
+ : null,
),
div(
{ class: () => `tg-file-uploader--dropzone flex-column clickable ${fileOver.val ? 'on-dragover' : ''}` },
@@ -177,7 +185,9 @@ const FileSelectionDropZone = (placeholder, sizeLimit) => {
div(
{ class: 'flex-column fx-gap-1' },
span({}, placeholder),
- span({ class: 'text-secondary text-caption' }, `Limit ${humanReadableSize(sizeLimit)} per file`),
+ sizeLimit
+ ? span({ class: 'text-secondary text-caption' }, `Limit ${humanReadableSize(sizeLimit)} per file`)
+ : null,
),
);
};
diff --git a/testgen/ui/components/frontend/js/components/help_menu.js b/testgen/ui/components/frontend/js/components/help_menu.js
index 3ea341db..45b2da24 100644
--- a/testgen/ui/components/frontend/js/components/help_menu.js
+++ b/testgen/ui/components/frontend/js/components/help_menu.js
@@ -23,9 +23,9 @@ import { Icon } from './icon.js';
const { a, div, span } = van.tags;
-const baseHelpUrl = 'https://docs.datakitchen.io/articles/dataops-testgen-help/';
-const releaseNotesTopic = 'testgen-release-notes';
-const upgradeTopic = 'upgrade-testgen';
+const baseHelpUrl = 'https://docs.datakitchen.io/testgen/';
+const releaseNotesTopic = 'release-notes/';
+const upgradeTopic = 'administer/upgrade-testgen/';
const slackUrl = 'https://data-observability-slack.datakitchen.io/join';
const trainingUrl = 'https://info.datakitchen.io/data-quality-training-and-certifications';
diff --git a/testgen/ui/components/frontend/js/components/input.js b/testgen/ui/components/frontend/js/components/input.js
index 130aba5c..da3b93fc 100644
--- a/testgen/ui/components/frontend/js/components/input.js
+++ b/testgen/ui/components/frontend/js/components/input.js
@@ -132,7 +132,7 @@ const Input = (/** @type Properties */ props) => {
props.prefix,
)
: undefined,
- input({
+ () => input({
value,
name: props.name ?? '',
type: inputType,
@@ -173,7 +173,7 @@ const Input = (/** @type Properties */ props) => {
style: `top: ${((getValue(props.height) || defaultHeight) - addonIconSize) / 2}px`,
onclick: () => inputType.val = passwordFieldTypeSwitch[inputType.val],
},
- inputType.val === 'password' ? 'visibility' : 'visibility_off',
+ () => inputType.val === 'password' ? 'visibility' : 'visibility_off',
)
: '',
showClearable
diff --git a/testgen/ui/components/frontend/js/components/portal.js b/testgen/ui/components/frontend/js/components/portal.js
index 12fa2e70..fce86227 100644
--- a/testgen/ui/components/frontend/js/components/portal.js
+++ b/testgen/ui/components/frontend/js/components/portal.js
@@ -23,7 +23,7 @@ const Portal = (/** @type Options */ options, ...args) => {
const { target, targetRelative, align = 'left', position = 'bottom' } = getValue(options);
const id = `${target}-portal`;
- window.testgen.portals[id] = { domId: id, targetId: target, opened: options.opened };
+ window.testgen.portals[id] = { domId: id, targetId: target, opened: options.opened, close: () => { options.opened.val = false; } };
return () => {
if (!getValue(options.opened)) {
diff --git a/testgen/ui/components/frontend/js/components/radio_group.js b/testgen/ui/components/frontend/js/components/radio_group.js
index 4f8b0008..97aef2df 100644
--- a/testgen/ui/components/frontend/js/components/radio_group.js
+++ b/testgen/ui/components/frontend/js/components/radio_group.js
@@ -8,11 +8,13 @@
* @typedef Properties
* @type {object}
* @property {string} label
+ * @property {string?} help
* @property {Option[]} options
* @property {string | number | boolean | null} value
* @property {function(string | number | boolean | null)?} onChange
* @property {number?} width
* @property {('default' | 'inline' | 'vertical')?} layout
+ * @property {boolean?} disabled
*/
import van from '../van.min.js';
import { getRandomId, getValue, loadStylesheet } from '../utils.js';
@@ -26,12 +28,19 @@ const RadioGroup = (/** @type Properties */ props) => {
const groupName = getRandomId();
const layout = getValue(props.layout) ?? 'default';
+ const disabled = getValue(props.disabled) ?? false;
return div(
- { class: () => `tg-radio-group--wrapper ${layout}`, style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}` },
+ { class: () => `tg-radio-group--wrapper ${layout}${disabled ? ' disabled' : ''}`, style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}` },
div(
- { class: 'text-caption tg-radio-group--label' },
+ { class: 'text-caption tg-radio-group--label flex-row fx-gap-1' },
props.label,
+ () => getValue(props.help)
+ ? withTooltip(
+ Icon({ size: 16, classes: 'text-disabled' }, 'help'),
+ { text: props.help, position: 'top', width: 200 }
+ )
+ : null,
),
() => div(
{ class: 'tg-radio-group' },
@@ -42,6 +51,7 @@ const RadioGroup = (/** @type Properties */ props) => {
name: groupName,
value: option.value,
checked: () => option.value === getValue(props.value),
+ disabled,
onchange: van.derive(() => {
const onChange = props.onChange?.val ?? props.onChange;
return onChange ? () => onChange(option.value) : null;
@@ -149,6 +159,11 @@ stylesheet.replace(`
border-radius: 5px;
}
+.tg-radio-group--wrapper.disabled {
+ opacity: 0.5;
+ pointer-events: none;
+}
+
.tg-radio-group--help {
white-space: pre-wrap;
line-height: 16px;
diff --git a/testgen/ui/components/frontend/js/components/score_issues.js b/testgen/ui/components/frontend/js/components/score_issues.js
index 659f8020..bcab1146 100644
--- a/testgen/ui/components/frontend/js/components/score_issues.js
+++ b/testgen/ui/components/frontend/js/components/score_issues.js
@@ -159,7 +159,7 @@ const IssuesTable = (
category === 'column_name'
? span({ class: 'ml-2' })
: ColumnProfilingButton(row.column, row.table, row.table_group_id),
- columns.map((columnName) => TableCell(row, columnName)),
+ columns.map((columnName) => TableCell(row, columnName, score.project_code)),
)),
() => Paginator({
pageIndex,
@@ -192,7 +192,7 @@ const ColumnProfilingButton = (
style: 'color: var(--secondary-text-color);',
tooltip: 'View profiling for column',
tooltipPosition: 'top-right',
- onclick: () => emitEvent('ColumnProflingClicked', { payload: { column_name, table_name, table_group_id } }),
+ onclick: () => emitEvent('ColumnProfilingClicked', { payload: { column_name, table_name, table_group_id } }),
});
};
@@ -253,13 +253,13 @@ const Toolbar = (
* @param {string} column
* @returns {}
*/
-const TableCell = (row, column) => {
+const TableCell = (row, column, projectCode) => {
const componentByColumn = {
column: IssueColumnCell,
type: IssueCell,
status: StatusCell,
detail: DetailCell,
- time: TimeCell,
+ time: (value, row) => TimeCell(value, row, projectCode),
};
if (componentByColumn[column]) {
@@ -306,7 +306,7 @@ const DetailCell = (value, row) => {
);
};
-const TimeCell = (value, row) => {
+const TimeCell = (value, row, projectCode) => {
return div(
{ class: 'flex-column', style: `flex: 0 0 ${ISSUES_COLUMNS_SIZES.time}` },
row.issue_type === 'test'
@@ -321,6 +321,7 @@ const TimeCell = (value, row) => {
table_name: row.table,
column_name: row.column,
selected: row.id,
+ project_code: projectCode,
},
}),
);
diff --git a/testgen/ui/components/frontend/js/components/select.js b/testgen/ui/components/frontend/js/components/select.js
index 3e3e658c..b454009b 100644
--- a/testgen/ui/components/frontend/js/components/select.js
+++ b/testgen/ui/components/frontend/js/components/select.js
@@ -10,6 +10,7 @@
* @property {string?} id
* @property {string} label
* @property {string?|Array.?} value
+* @property {string?} placeholder
* @property {Array.} options
* @property {boolean} allowNull
* @property {Function|null} onChange
@@ -168,6 +169,7 @@ const Select = (/** @type {Properties} */ props) => {
? input({
id: `tg-select--field--${getRandomId()}`,
value: valueLabel.val,
+ placeholder: props.placeholder,
onkeyup: filterOptions,
})
: valueLabel.val,
diff --git a/testgen/ui/components/frontend/js/components/table_group_form.js b/testgen/ui/components/frontend/js/components/table_group_form.js
index 6b072255..8ba8b414 100644
--- a/testgen/ui/components/frontend/js/components/table_group_form.js
+++ b/testgen/ui/components/frontend/js/components/table_group_form.js
@@ -14,6 +14,7 @@
* @property {string?} profile_sk_column_mask
* @property {number?} profiling_delay_days
* @property {boolean?} profile_flag_cdes
+ * @property {boolean?} profile_flag_pii
* @property {boolean?} include_in_dashboard
* @property {boolean?} add_scorecard_definition
* @property {boolean?} profile_use_sampling
@@ -41,6 +42,7 @@
* @property {boolean?} showConnectionSelector
* @property {boolean?} disableConnectionSelector
* @property {boolean?} disableSchemaField
+ * @property {boolean?} disablePiiFlag
* @property {(tg: TableGroup, state: FormState) => void} onChange
*/
import van from '../van.min.js';
@@ -81,6 +83,8 @@ const TableGroupForm = (props) => {
const profileSkColumnMask = van.state(tableGroup.profile_sk_column_mask ?? '%_sk');
const profilingDelayDays = van.state(tableGroup.profiling_delay_days ?? 0);
const profileFlagCdes = van.state(tableGroup.profile_flag_cdes ?? true);
+ const profileFlagPii = van.state(tableGroup.profile_flag_pii ?? true);
+ const profileExcludeXde = van.state(tableGroup.profile_exclude_xde ?? true);
const includeInDashboard = van.state(tableGroup.include_in_dashboard ?? true);
const addScorecardDefinition = van.state(tableGroup.add_scorecard_definition ?? true);
const profileUseSampling = van.state(tableGroup.profile_use_sampling ?? false);
@@ -120,6 +124,8 @@ const TableGroupForm = (props) => {
profile_sk_column_mask: profileSkColumnMask.val,
profiling_delay_days: profilingDelayDays.val,
profile_flag_cdes: profileFlagCdes.val,
+ profile_flag_pii: profileFlagPii.val,
+ profile_exclude_xde: profileExcludeXde.val,
include_in_dashboard: includeInDashboard.val,
add_scorecard_definition: addScorecardDefinition.val,
profile_use_sampling: profileUseSampling.val,
@@ -183,9 +189,11 @@ const TableGroupForm = (props) => {
profileSkColumnMask,
),
SettingsForm(
- { editMode: !!tableGroup.id, setValidity: setFieldValidity },
+ { editMode: !!tableGroup.id, disablePiiFlag: getValue(props.disablePiiFlag) ?? false, setValidity: setFieldValidity },
profilingDelayDays,
profileFlagCdes,
+ profileFlagPii,
+ profileExcludeXde,
includeInDashboard,
addScorecardDefinition,
),
@@ -325,6 +333,8 @@ const SettingsForm = (
options,
profilingDelayDays,
profileFlagCdes,
+ profileFlagPii,
+ profileExcludeXde,
includeInDashboard,
addScorecardDefinition,
) => {
@@ -339,6 +349,19 @@ const SettingsForm = (
checked: profileFlagCdes,
onChange: (value) => profileFlagCdes.val = value,
}),
+ Checkbox({
+ name: 'profile_flag_pii',
+ label: 'Detect PII during profiling',
+ checked: profileFlagPii,
+ onChange: (value) => profileFlagPii.val = value,
+ disabled: options.disablePiiFlag,
+ }),
+ Checkbox({
+ name: 'profile_exclude_xde',
+ label: 'Exclude XDE columns from profiling',
+ checked: profileExcludeXde,
+ onChange: (value) => profileExcludeXde.val = value,
+ }),
Checkbox({
name: 'include_in_dashboard',
label: 'Include table group in Project Dashboard',
diff --git a/testgen/ui/components/frontend/js/components/table_group_test.js b/testgen/ui/components/frontend/js/components/table_group_test.js
index ff987f06..94aa4898 100644
--- a/testgen/ui/components/frontend/js/components/table_group_test.js
+++ b/testgen/ui/components/frontend/js/components/table_group_test.js
@@ -111,7 +111,7 @@ const TableGroupTest = (preview, options) => {
),
)
: div(
- { class: 'flex-row fx-justify-center', style: 'height: 50px; font-size: 16px;'},
+ { class: 'flex-row fx-justify-center p-3', style: 'min-height: 50px; font-size: 14px;'},
tableGroupPreview.message ?? 'No tables found.'
),
),
diff --git a/testgen/ui/components/frontend/js/components/test_definition_form.js b/testgen/ui/components/frontend/js/components/test_definition_form.js
index 31812f87..80962eee 100644
--- a/testgen/ui/components/frontend/js/components/test_definition_form.js
+++ b/testgen/ui/components/frontend/js/components/test_definition_form.js
@@ -51,6 +51,7 @@
* @property {string} default_parm_columns
* @property {string} default_parm_prompts
* @property {string} default_parm_help
+ * @property {string?} default_parm_required
* @property {string} default_severity
* @property {'column'|'referential'|'table'|'tablegroup'|'custom'} test_scope
* @property {string?} prediction
@@ -69,7 +70,7 @@ import { Select } from './select.js';
import { Textarea } from './textarea.js';
import { RadioGroup } from './radio_group.js';
import { Caption } from './caption.js';
-import { numberBetween } from '../form_validators.js';
+import { numberBetween, required } from '../form_validators.js';
const { div, span } = van.tags;
@@ -97,6 +98,7 @@ const TestDefinitionForm = (/** @type Properties */ props) => {
const paramColumns = (definition.default_parm_columns || '').split(',').map(v => v.trim());
const paramLabels = (definition.default_parm_prompts || '').split(',').map(v => v.trim());
const paramHelp = (definition.default_parm_help || '').split('|').map(v => v.trim());
+ const paramRequired = (definition.default_parm_required || '').split(',').map(v => v.trim().toUpperCase() === 'Y');
const hasThresholds = paramColumns.includes('history_calculation');
const dynamicParamColumns = paramColumns
@@ -105,6 +107,7 @@ const TestDefinitionForm = (/** @type Properties */ props) => {
column,
label: paramLabels[index] || column.replaceAll('_', ' '),
help: paramHelp[index] || null,
+ validators: paramRequired[index] ? [required] : undefined,
}))
.filter(config => !hasThresholds || !thresholdColumns.includes(config.column))
@@ -171,6 +174,7 @@ const TestDefinitionForm = (/** @type Properties */ props) => {
type: 'number',
value: currentValue(),
step: config.step,
+ validators: config.validators,
onChange: (value, state) => {
setFieldValues({ [column]: value || null })
setFieldValidity(column, state.valid);
@@ -188,8 +192,10 @@ const TestDefinitionForm = (/** @type Properties */ props) => {
help: config.help,
value: currentValue(),
height: 100,
- onChange: (value) => {
- setFieldValues({ [column]: value || null })
+ validators: config.validators,
+ onChange: (value, state) => {
+ setFieldValues({ [column]: value || null });
+ setFieldValidity(column, state.valid);
},
}),
);
@@ -202,6 +208,7 @@ const TestDefinitionForm = (/** @type Properties */ props) => {
label: config.label,
help: config.help,
value: currentValue(),
+ validators: config.validators,
onChange: (value, state) => {
setFieldValues({ [column]: value || null })
setFieldValidity(column, state.valid);
@@ -252,8 +259,8 @@ const historyCalcOptions = [
* @property {(updatedValues: object) => void} setFieldValues
* @property {(field: string, valid: boolean) => void} setFieldValidity
*
- * @param {ThresholdFormOptions} options
- * @param {TestDefinition} definition
+ * @param {ThresholdFormOptions} options
+ * @param {TestDefinition} definition
*/
const ThresholdForm = (options, definition) => {
const { setFieldValues, setFieldValidity } = options;
@@ -291,6 +298,21 @@ const ThresholdForm = (options, definition) => {
'lower_tolerance': newMode === 'static' ? lowerTolerance.val : newMode === 'prediction' ? definition.lower_tolerance : null,
'upper_tolerance': newMode === 'static' ? upperTolerance.val : newMode === 'prediction' ? definition.upper_tolerance : null,
});
+ if (newMode === 'static') {
+ if (!isFreshnessTrend) {
+ setFieldValidity('lower_tolerance', !!lowerTolerance.val);
+ }
+ setFieldValidity('upper_tolerance', !!upperTolerance.val);
+ setFieldValidity('history_lookback', true);
+ } else if (newMode === 'historical') {
+ setFieldValidity('lower_tolerance', true);
+ setFieldValidity('upper_tolerance', true);
+ setFieldValidity('history_lookback', !!historyLookback.val);
+ } else {
+ setFieldValidity('lower_tolerance', true);
+ setFieldValidity('upper_tolerance', true);
+ setFieldValidity('history_lookback', true);
+ }
},
}),
() => {
@@ -376,8 +398,8 @@ const ThresholdForm = (options, definition) => {
if (mode.val === 'static') {
return div(
- { class: 'flex-row fx-gap-3 fx-flex-wrap mt-2' },
- !isFreshnessTrend
+ { class: 'flex-row fx-gap-3 fx-flex-wrap fx-align-flex-start mt-2' },
+ !isFreshnessTrend
? div(
{ class: 'td-form--field' },
Input({
@@ -385,6 +407,7 @@ const ThresholdForm = (options, definition) => {
label: 'Lower Bound',
type: 'number',
value: lowerTolerance,
+ validators: [required],
onChange: (value, state) => {
lowerTolerance.val = value;
setFieldValues({ lower_tolerance: value });
@@ -400,6 +423,7 @@ const ThresholdForm = (options, definition) => {
label: isFreshnessTrend ? 'Maximum interval since last update (minutes)' : 'Upper Bound',
type: 'number',
value: upperTolerance,
+ validators: [required],
onChange: (value, state) => {
upperTolerance.val = value;
setFieldValues({ upper_tolerance: value });
diff --git a/testgen/ui/components/frontend/js/components/textarea.js b/testgen/ui/components/frontend/js/components/textarea.js
index 828d8c86..bdfc411a 100644
--- a/testgen/ui/components/frontend/js/components/textarea.js
+++ b/testgen/ui/components/frontend/js/components/textarea.js
@@ -1,4 +1,11 @@
/**
+ * @import { Validator } from '../form_validators.js';
+ *
+ * @typedef InputState
+ * @type {object}
+ * @property {boolean} valid
+ * @property {string[]} errors
+ *
* @typedef Properties
* @type {object}
* @property {string?} id
@@ -16,13 +23,14 @@
* @property {number?} width
* @property {number?} height
* @property {string?} testId
+ * @property {Array?} validators
*/
import van from '../van.min.js';
-import { debounce, getValue, loadStylesheet, getRandomId } from '../utils.js';
+import { debounce, getValue, loadStylesheet, getRandomId, checkIsRequired } from '../utils.js';
import { Icon } from './icon.js';
import { withTooltip } from './tooltip.js';
-const { div, label, textarea } = van.tags;
+const { div, label, textarea, small, span } = van.tags;
const defaultHeight = 64;
const Textarea = (/** @type Properties */ props) => {
@@ -30,18 +38,31 @@ const Textarea = (/** @type Properties */ props) => {
const domId = van.derive(() => getValue(props.id) ?? getRandomId());
const value = van.derive(() => getValue(props.value) ?? '');
+ const errors = van.derive(() => {
+ const validators = getValue(props.validators) ?? [];
+ return validators.map(v => v(value.val)).filter(error => error);
+ });
+ const firstError = van.derive(() => {
+ return errors.val[0] ?? '';
+ });
+ const isRequired = van.state(false);
+ const isDirty = van.state(false);
const onChange = props.onChange?.val ?? props.onChange;
if (onChange) {
- onChange(value.val);
+ onChange(value.val, { errors: errors.val, valid: errors.val.length <= 0 });
}
van.derive(() => {
const onChange = props.onChange?.val ?? props.onChange;
- if (onChange && value.val !== value.oldVal) {
- onChange(value.val);
+ if (onChange && (value.val !== value.oldVal || errors.val.length !== errors.oldVal.length)) {
+ onChange(value.val, { errors: errors.val, valid: errors.val.length <= 0 });
}
});
+ van.derive(() => {
+ isRequired.val = checkIsRequired(getValue(props.validators) ?? []);
+ });
+
return label(
{
id: domId,
@@ -52,6 +73,9 @@ const Textarea = (/** @type Properties */ props) => {
div(
{ class: 'flex-row fx-gap-1 text-caption' },
props.label,
+ () => isRequired.val
+ ? span({ class: 'text-error' }, '*')
+ : '',
() => getValue(props.help)
? withTooltip(
Icon({ size: 16, classes: 'text-disabled' }, 'help'),
@@ -66,8 +90,15 @@ const Textarea = (/** @type Properties */ props) => {
name: props.name ?? '',
disabled: props.disabled,
placeholder: () => getValue(props.placeholder) ?? '',
- oninput: debounce((/** @type Event */ event) => value.val = event.target.value, 300),
+ oninput: debounce((/** @type Event */ event) => {
+ isDirty.val = true;
+ value.val = event.target.value;
+ }, 300),
}),
+ () =>
+ isDirty.val && firstError.val
+ ? small({ class: 'tg-textarea--error' }, firstError)
+ : '',
);
};
@@ -96,6 +127,11 @@ stylesheet.replace(`
outline: none;
border-color: var(--primary-color);
}
+
+.tg-textarea--error {
+ height: 12px;
+ color: var(--error-color);
+}
`);
export { Textarea };
diff --git a/testgen/ui/components/frontend/js/components/toggle.js b/testgen/ui/components/frontend/js/components/toggle.js
index 0a635c7c..8d3fdbd4 100644
--- a/testgen/ui/components/frontend/js/components/toggle.js
+++ b/testgen/ui/components/frontend/js/components/toggle.js
@@ -4,6 +4,7 @@
* @property {string} label
* @property {string?} name
* @property {boolean?} checked
+ * @property {boolean?} disabled
* @property {string?} style
* @property {function(boolean)?} onChange
*/
@@ -15,14 +16,17 @@ const { input, label } = van.tags;
const Toggle = (/** @type Properties */ props) => {
loadStylesheet('toggle', stylesheet);
+ const disabled = props.disabled?.val ?? props.disabled ?? false;
+
return label(
- { class: 'flex-row fx-gap-2 clickable', style: props.style ?? '', 'data-testid': props.name ?? '' },
+ { class: `flex-row fx-gap-2 ${disabled ? '' : 'clickable'}`, style: props.style ?? '', 'data-testid': props.name ?? '' },
input({
type: 'checkbox',
role: 'switch',
class: 'tg-toggle--input clickable',
name: props.name ?? '',
checked: props.checked,
+ disabled,
onchange: van.derive(() => {
const onChange = props.onChange?.val ?? props.onChange;
return onChange ? (/** @type Event */ event) => onChange(event.target.checked) : null;
@@ -84,6 +88,11 @@ stylesheet.replace(`
.tg-toggle--input:checked::after {
left: 14px;
}
+
+.tg-toggle--input:disabled {
+ opacity: 0.5;
+ cursor: not-allowed;
+}
`);
export { Toggle };
diff --git a/testgen/ui/components/frontend/js/components/tree.js b/testgen/ui/components/frontend/js/components/tree.js
index 82acc371..59001db5 100644
--- a/testgen/ui/components/frontend/js/components/tree.js
+++ b/testgen/ui/components/frontend/js/components/tree.js
@@ -6,8 +6,9 @@
* @property {string?} classes
* @property {string?} icon
* @property {number?} iconSize
- * @property {'red'?} iconColor
+ * @property {string?} iconClass
* @property {string?} iconTooltip
+ * @property {Element|function?} prefix
* @property {TreeNode[]?} children
* @property {number?} level
* @property {boolean?} expanded
@@ -91,7 +92,7 @@ const Tree = (/** @type Properties */ props, /** @type any? */ searchOptionsCont
},
Toolbar(treeNodes, multiSelect, props, searchOptionsContent, filtersContent),
div(
- { class: 'tg-tree' },
+ { class: () => `tg-tree ${multiSelect.val ? 'multi-select' : ''}` },
() => div(
{
class: 'tg-tree--nodes',
@@ -312,9 +313,10 @@ const TreeNode = (
span({ class: 'mr-1' }),
]
: null,
+ !multiSelect && node.prefix ? node.prefix : null,
() => {
if (node.icon) {
- const icon = Icon({ size: node.iconSize, classes: `tg-tree--row-icon ${node.iconColor}` }, node.icon);
+ const icon = Icon({ size: node.iconSize, classes: `tg-tree--row-icon ${node.iconClass}` }, node.icon);
return node.iconTooltip ? withTooltip(icon, { text: node.iconTooltip, position: 'right' }) : icon;
}
return null;
@@ -519,10 +521,6 @@ stylesheet.replace(`
color: #B0BEC5;
text-align: center;
}
-
-.tg-tree--row-icon.red {
- color: var(--red);
-}
`);
export { Tree };
diff --git a/testgen/ui/components/frontend/js/components/wizard_progress_indicator.js b/testgen/ui/components/frontend/js/components/wizard_progress_indicator.js
index 88bbb789..80e35703 100644
--- a/testgen/ui/components/frontend/js/components/wizard_progress_indicator.js
+++ b/testgen/ui/components/frontend/js/components/wizard_progress_indicator.js
@@ -14,14 +14,15 @@
*
* @param {WizardStepMeta[]} steps
* @param {CurrentStep} currentStep
- * @returns
+ * @param {function(string)?} onStepClick
+ * @returns
*/
import van from '../van.min.js';
import { colorMap } from '../display_utils.js';
const { div, i, span } = van.tags;
-const WizardProgressIndicator = (steps, currentStep) => {
+const WizardProgressIndicator = (steps, currentStep, onStepClick) => {
const currentPhysicalIndex = steps.findIndex(s => s.includedSteps.includes(currentStep.name));
const progressWidth = van.state('0px');
@@ -50,8 +51,12 @@ const WizardProgressIndicator = (steps, currentStep) => {
z-index: -4;
`;
- const currentStepIndicator = (title, stepIndex) => div(
- { class: `flex-column fx-align-flex-center fx-gap-1 step-icon-current`, style: 'position: relative;' },
+ const currentStepIndicator = (title, stepIndex, step) => div(
+ {
+ class: `flex-column fx-align-flex-center fx-gap-1 step-icon-current`,
+ style: `position: relative; ${onStepClick ? 'cursor: pointer;' : ''}`,
+ onclick: () => onStepClick?.(step.includedSteps[0]),
+ },
stepIndex === 0
? div({ style: 'position: absolute; width: 50%; height: 50%; left: 0px; background: var(--dk-dialog-background); z-index: -1;' }, '')
: '',
@@ -66,7 +71,10 @@ const WizardProgressIndicator = (steps, currentStep) => {
);
const pendingStepIndicator = (title, stepIndex) => div(
- { class: `flex-column fx-align-flex-center fx-gap-1 ${currentPhysicalIndex === stepIndex ? 'step-icon-current' : 'text-secondary'}`, style: 'position: relative;' },
+ {
+ class: `flex-column fx-align-flex-center fx-gap-1 ${currentPhysicalIndex === stepIndex ? 'step-icon-current' : 'text-secondary'}`,
+ style: 'position: relative; cursor: default;',
+ },
stepIndex === 0
? div({ style: 'position: absolute; width: 50%; height: 50%; left: 0px; background: var(--dk-dialog-background); z-index: -1;' }, '')
: '',
@@ -80,8 +88,12 @@ const WizardProgressIndicator = (steps, currentStep) => {
span({}, title),
);
- const completedStepIndicator = (title, stepIndex) => div(
- { class: `flex-column fx-align-flex-center fx-gap-1 ${currentPhysicalIndex === stepIndex ? 'step-icon-current' : 'text-secondary'}`, style: 'position: relative;' },
+ const completedStepIndicator = (title, stepIndex, step) => div(
+ {
+ class: `flex-column fx-align-flex-center fx-gap-1 ${currentPhysicalIndex === stepIndex ? 'step-icon-current' : 'text-secondary'}`,
+ style: `position: relative; ${onStepClick ? 'cursor: pointer;' : ''}`,
+ onclick: () => onStepClick?.(step.includedSteps[0]),
+ },
stepIndex === 0
? div({ style: 'position: absolute; width: 50%; height: 50%; left: 0px; background: var(--dk-dialog-background); z-index: -1;' }, '')
: '',
@@ -134,9 +146,9 @@ const WizardProgressIndicator = (steps, currentStep) => {
...steps.map((step, physicalIdx) => {
if (step.index < currentStep.index) {
if (step.skipped) return skippedStepIndicator(step.title, physicalIdx);
- return completedStepIndicator(step.title, physicalIdx);
+ return completedStepIndicator(step.title, physicalIdx, step);
} else if (step.includedSteps.includes(currentStep.name)) {
- return currentStepIndicator(step.title, physicalIdx);
+ return currentStepIndicator(step.title, physicalIdx, step);
} else {
return pendingStepIndicator(step.title, physicalIdx);
}
diff --git a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
index 85689099..49c63832 100644
--- a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
+++ b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
@@ -17,7 +17,7 @@ import { PercentBar } from '../components/percent_bar.js';
import { FrequencyBars } from '../components/frequency_bars.js';
import { BoxPlot } from '../components/box_plot.js';
import { loadStylesheet, emitEvent, friendlyPercent, getValue } from '../utils.js';
-import { formatNumber, formatTimestamp } from '../display_utils.js';
+import { formatNumber, formatTimestamp, PII_REDACTED } from '../display_utils.js';
const { div, span } = van.tags;
const columnTypeFunctionMap = {
@@ -150,15 +150,17 @@ function AlphaColumn(/** @type Column */ item) {
),
item.top_freq_values || item.top_patterns ? div(
{ class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-5 tg-profile--plot-block' },
- item.top_freq_values ? FrequencyBars({
- title: 'Frequent Values',
- total: item.record_ct,
- nullCount: item.null_value_ct,
- items: item.top_freq_values.substring(2).split('\n| ').map(parts => {
- const [value, count] = parts.split(' | ');
- return { value, count: Number(count) };
- }),
- }) : null,
+ item.top_freq_values === PII_REDACTED
+ ? Attribute({ label: 'Frequent Values', value: PII_REDACTED, width: attributeWidth })
+ : item.top_freq_values ? FrequencyBars({
+ title: 'Frequent Values',
+ total: item.record_ct,
+ nullCount: item.null_value_ct,
+ items: item.top_freq_values.substring(2).split('\n| ').map(parts => {
+ const [value, count] = parts.split(' | ');
+ return { value, count: Number(count) };
+ }),
+ }) : null,
item.top_patterns ? FrequencyBars({
title: 'Frequent Patterns',
total: item.record_ct,
@@ -292,19 +294,19 @@ function NumericColumn(/** @type Column */ item) {
Attribute({ label: 'Median Value', value: formatNumber(item.percentile_50), width: attributeWidth }),
Attribute({ label: '75th Percentile', value: formatNumber(item.percentile_75), width: attributeWidth }),
),
- div(
- { class: 'flex-row fx-justify-center tg-profile--plot-block' },
- BoxPlot({
- minimum: item.min_value,
- maximum: item.max_value,
- median: item.percentile_50,
- lowerQuartile: item.percentile_25,
- upperQuartile: item.percentile_75,
- average: item.avg_value,
- standardDeviation: item.stdev_value,
- width: boxPlotWidth,
- }),
- ),
+ item.min_value === PII_REDACTED || item.max_value === PII_REDACTED ? null : div(
+ { class: 'flex-row fx-justify-center tg-profile--plot-block' },
+ BoxPlot({
+ minimum: item.min_value,
+ maximum: item.max_value,
+ median: item.percentile_50,
+ lowerQuartile: item.percentile_25,
+ upperQuartile: item.percentile_75,
+ average: item.avg_value,
+ standardDeviation: item.stdev_value,
+ width: boxPlotWidth,
+ }),
+ ),
);
}
diff --git a/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js b/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js
index 98f4a6e1..f08dbf7f 100644
--- a/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js
+++ b/testgen/ui/components/frontend/js/data_profiling/column_profiling_results.js
@@ -12,7 +12,7 @@ import { getValue, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange, loa
import { ColumnDistributionCard } from './column_distribution.js';
import { DataCharacteristicsCard } from './data_characteristics.js';
import { LatestProfilingTime } from './data_profiling_utils.js';
-import { HygieneIssuesCard, PotentialPIICard } from './data_issues.js';
+import { HygieneIssuesCard } from './data_issues.js';
const { div, h2, span } = van.tags;
@@ -51,10 +51,7 @@ const ColumnProfilingResults = (/** @type Properties */ props) => {
),
DataCharacteristicsCard({ border: true }, column.val),
ColumnDistributionCard({ border: true, dataPreview: !!props.data_preview?.val }, column.val),
- column.val.hygiene_issues ? [
- PotentialPIICard({ border: true }, column.val),
- HygieneIssuesCard({ border: true }, column.val),
- ] : null,
+ column.val.hygiene_issues ? HygieneIssuesCard({ border: true }, column.val) : null,
),
);
}
diff --git a/testgen/ui/components/frontend/js/data_profiling/data_issues.js b/testgen/ui/components/frontend/js/data_profiling/data_issues.js
index 261a2283..1bd38e7a 100644
--- a/testgen/ui/components/frontend/js/data_profiling/data_issues.js
+++ b/testgen/ui/components/frontend/js/data_profiling/data_issues.js
@@ -22,11 +22,6 @@ import { formatTimestamp } from '../display_utils.js';
const { div, span, i } = van.tags;
-const RISK_COLORS = {
- High: 'red',
- Moderate: 'orange',
-};
-
const LIKELIHOOD_COLORS = {
Definite: 'red',
Likely: 'orange',
@@ -40,40 +35,6 @@ const STATUS_COLORS = {
Log: 'blue',
};
-const PotentialPIICard = (/** @type Properties */ props, /** @type Table | Column */ item) => {
- const title = `Potential PII ${item.is_latest_profile ? '*' : ''}`;
- const attributes = [
- {
- key: 'detail', width: 150, label: 'Type',
- value_function: (issue) => (issue.detail || '').split('Type: ')[1],
- },
- {
- key: 'pii_risk', width: 100, label: 'Risk', classes: 'text-secondary',
- value_function: (issue) => div(
- { class: 'flex-row' },
- span({ class: 'dot mr-2', style: `color: var(--${RISK_COLORS[issue.pii_risk]});` }),
- issue.pii_risk,
- ),
- },
- ];
- if (item.type === 'table') {
- attributes.unshift(
- { key: 'column_name', width: 150, label: 'Column' },
- );
- }
-
- const potentialPII = item.hygiene_issues.filter(({ issue_likelihood }) => issue_likelihood === 'Potential PII');
- const linkProps = props.noLinks ? null : {
- href: 'profiling-runs:hygiene',
- params: { run_id: item.profile_run_id, issue_class: 'Potential PII' },
- };
- const noneContent = item.profile_run_id && !item.profiling_error
- ? 'No potential PII detected'
- : span({ class: 'text-secondary' }, `No profiling results for ${item.type}`);
-
- return IssuesCard(props, title, potentialPII, attributes, linkProps, noneContent);
-};
-
const HygieneIssuesCard = (/** @type Properties */ props, /** @type Table | Column */ item) => {
const title = `Hygiene Issues ${item.is_latest_profile ? '*' : ''}`;
const attributes = [
@@ -101,6 +62,7 @@ const HygieneIssuesCard = (/** @type Properties */ props, /** @type Table | Colu
run_id: item.profile_run_id,
table_name: item.table_name,
column_name: item.column_name,
+ project_code: item.project_code,
},
};
const noneContent = item.profile_run_id && !item.profiling_error
@@ -141,6 +103,7 @@ const TestIssuesCard = (/** @type Properties */ props, /** @type Table | Column
table_name: item.table_name,
column_name: item.column_name,
selected: issue.id,
+ project_code: item.project_code,
},
open_new: true,
label: formatTimestamp(issue.test_run_date),
@@ -248,4 +211,4 @@ const IssuesCard = (
});
}
-export { PotentialPIICard, HygieneIssuesCard, TestIssuesCard };
+export { HygieneIssuesCard, TestIssuesCard };
diff --git a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
index 6c4c9586..71f2ac5e 100644
--- a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
+++ b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
@@ -45,6 +45,8 @@
* * Column Tags
* @property {string?} description
* @property {boolean?} critical_data_element
+ * @property {boolean?} excluded_data_element
+ * @property {boolean?} pii_flag
* @property {string?} data_source
* @property {string?} source_system
* @property {string?} source_process
@@ -227,6 +229,7 @@ const LatestProfilingTime = (/** @type Properties */ props, /** @type Table | Co
run_id: item.profile_run_id,
table_name: item.table_name,
column_name: item.column_name,
+ project_code: item.project_code,
},
open_new: true,
label: formatTimestamp(item.profile_run_date),
diff --git a/testgen/ui/components/frontend/js/data_profiling/metadata_tags.js b/testgen/ui/components/frontend/js/data_profiling/metadata_tags.js
new file mode 100644
index 00000000..88554474
--- /dev/null
+++ b/testgen/ui/components/frontend/js/data_profiling/metadata_tags.js
@@ -0,0 +1,490 @@
+/**
+ * @import { Column, Table } from './data_profiling_utils.js';
+ *
+ * @typedef TagProperties
+ * @type {object}
+ * @property {Object.} tagOptions
+ * @property {boolean} editable
+ * @property {boolean} piiEditable
+ * @property {AutoflagSettings} autoflagSettings
+ * @property {(() => void)?} onCancel
+ */
+import van from '../van.min.js';
+import { EditableCard } from '../components/editable_card.js';
+import { Attribute } from '../components/attribute.js';
+import { Input } from '../components/input.js';
+import { Icon } from '../components/icon.js';
+import { withTooltip } from '../components/tooltip.js';
+import { emitEvent, loadStylesheet } from '../utils.js';
+import { RadioGroup } from '../components/radio_group.js';
+import { Checkbox } from '../components/checkbox.js';
+import { capitalize } from '../display_utils.js';
+import { Card } from '../components/card.js';
+import { Dialog } from '../components/dialog.js';
+import { Button } from '../components/button.js';
+import { Alert } from '../components/alert.js';
+
+const { div, span } = van.tags;
+
+const attributeWidth = 250;
+const descriptionWidth = 932;
+const multiEditWidth = 400;
+
+const booleanOptions = [
+ { label: 'Yes', value: true },
+ { label: 'No', value: false },
+];
+
+const piiOptions = [
+ { label: 'Yes', value: 'MANUAL' },
+ { label: 'No', value: null },
+];
+
+const pii_risk_map = {
+ 'A': 'High',
+ 'B': 'Moderate',
+ 'C': 'Low',
+};
+const pii_type_map = {
+ 'ID': 'ID',
+ 'NAME': 'Name',
+ 'DEMO': 'Demographic',
+ 'CONTACT': 'Contact',
+};
+
+const TAG_KEYS = [
+ 'data_source',
+ 'source_system',
+ 'source_process',
+ 'business_domain',
+ 'stakeholder_group',
+ 'transform_level',
+ 'aggregation_level',
+ 'data_product',
+];
+const TAG_HELP = {
+ data_source: 'Original source of the dataset',
+ source_system: 'Enterprise system source for the dataset',
+ source_process: 'Process, program, or data flow that produced the dataset',
+ business_domain: 'Business division responsible for the dataset, e.g., Finance, Sales, Manufacturing',
+ stakeholder_group: 'Data owners or stakeholders responsible for the dataset',
+ transform_level: 'Data warehouse processing stage, e.g., Raw, Conformed, Processed, Reporting, or Medallion level (bronze, silver, gold)',
+ aggregation_level: 'Data granularity of the dataset, e.g. atomic, historical, snapshot, aggregated, time-rollup, rolling, summary',
+ data_product: 'Data domain that comprises the dataset',
+};
+
+/**
+ * @param {TagProperties} props
+ * @param {Table | Column} item
+ * @returns
+ */
+const MetadataTagsCard = (props, item) => {
+ loadStylesheet('metadata-tags', stylesheet);
+
+ const title = `${item.type} Tags `;
+ const attributes = [
+ 'critical_data_element',
+ ...(item.type === 'column' ? ['pii_flag', 'excluded_data_element'] : []),
+ 'description',
+ ...TAG_KEYS,
+ ].map(key => {
+ let value = item[key];
+ if (['excluded_data_element', 'pii_flag'].includes(key) || (item.type === 'table' && key === 'critical_data_element')) {
+ value = value ?? false;
+ }
+ return {
+ key,
+ help: TAG_HELP[key],
+ label: key === 'pii_flag' ? 'PII Data' : capitalize(key.replaceAll('_', ' ')),
+ state: van.state(value),
+ inheritTableGroup: item[`table_group_${key}`] ?? null, // Table group values inherited by table or column
+ inheritTable: item[`table_${key}`] ?? null, // Table values inherited by column
+ };
+ });
+
+ const content = div(
+ { class: 'flex-row fx-flex-wrap fx-gap-4' },
+ attributes.map(({ key, label, help, state, inheritTable, inheritTableGroup }) => {
+ let value = state.rawVal ?? inheritTable ?? inheritTableGroup;
+
+ if (key === 'critical_data_element') {
+ return CdeDisplay(value, item.type === 'column', state.rawVal === null);
+ }
+ if (key === 'excluded_data_element') {
+ return XdeDisplay(value);
+ }
+ if (key === 'pii_flag') {
+ return PiiDisplay(value);
+ }
+
+ const inheritedFrom = state.rawVal !== null ? null
+ : inheritTable !== null ? 'table'
+ : inheritTableGroup !== null ? 'table group'
+ : null;
+
+ if (inheritedFrom && value) {
+ value = span(
+ { class: 'flex-row fx-gap-1' },
+ InheritedIcon(inheritedFrom),
+ value,
+ );
+ }
+ return Attribute({ label, help, value, width: key === 'description' ? descriptionWidth : attributeWidth });
+ }),
+ );
+
+ if (!props.editable) {
+ return Card({ title, content });
+ }
+
+ // Define as function so the block is re-rendered with reset values when re-editing after a cancel
+ const editingContent = () => div(
+ { class: 'flex-row fx-flex-wrap fx-gap-4' },
+ attributes.map(({ key, label, help, state, inheritTable, inheritTableGroup }) => {
+ if (key === 'critical_data_element') {
+ return RadioGroup({
+ label,
+ help: 'Mark columns that are important for business decisions or regulatory compliance. CDEs are highlighted in the catalog and can be tracked separately in quality scores.',
+ options: item.type === 'column' ? [...booleanOptions, { label: 'Inherit', value: null }] : booleanOptions,
+ width: attributeWidth,
+ value: state.rawVal,
+ onChange: (value) => state.val = value,
+ });
+ }
+ if (key === 'excluded_data_element') {
+ return RadioGroup({
+ label,
+ help: 'Exclude this column from profiling and test generation. The column remains in the catalog but will not be analyzed.',
+ options: booleanOptions,
+ width: attributeWidth,
+ value: state.rawVal,
+ onChange: (value) => state.val = value,
+ });
+ }
+ if (key === 'pii_flag') {
+ return RadioGroup({
+ label,
+ help: 'Mark columns containing personally identifiable information. PII values are redacted for users without viewing permissions.',
+ options: piiOptions,
+ width: attributeWidth,
+ value: state.rawVal ? 'MANUAL' : null,
+ onChange: (value) => state.val = value,
+ disabled: !props.piiEditable,
+ });
+ }
+ return Input({
+ label, help,
+ width: key === 'description' ? descriptionWidth : attributeWidth,
+ height: 32,
+ value: state.rawVal,
+ placeholder: (inheritTable || inheritTableGroup) ? `Inherited: ${inheritTable ?? inheritTableGroup}` : null,
+ autocompleteOptions: props.tagOptions?.[key],
+ onChange: (value) => state.val = value || null,
+ });
+ }),
+ );
+
+ const warningDialogOpen = van.state(false);
+ const pendingSaveAction = van.state(null);
+ const warnCde = van.state(false);
+ const warnPii = van.state(false);
+
+ return div(
+ EditableCard({
+ title: `${item.type} Tags `,
+ content, editingContent,
+ onSave: () => {
+ const items = [{ type: item.type, id: item.id }];
+ const tags = attributes.reduce((object, { key, state }) => {
+ object[key] = state.rawVal;
+ return object;
+ }, {});
+
+ warnCde.val = props.autoflagSettings.profile_flag_cdes && tags.critical_data_element !== item.critical_data_element;
+ warnPii.val = props.autoflagSettings.profile_flag_pii && tags.pii_flag !== item.pii_flag;
+
+ if (warnCde.val || warnPii.val) {
+ const disableFlags = [];
+ if (warnCde.val) {
+ disableFlags.push('profile_flag_cdes');
+ }
+ if (warnPii.val) {
+ disableFlags.push('profile_flag_pii');
+ }
+ pendingSaveAction.val = () => emitEvent('TagsChanged', { payload: { items, tags, disable_flags: disableFlags } });
+ warningDialogOpen.val = true;
+ } else {
+ emitEvent('TagsChanged', { payload: { items, tags } })
+ }
+ },
+ // Reset states to original values on cancel
+ onCancel: () => attributes.forEach(({ key, state }) => state.val = item[key]),
+ hasChanges: () => attributes.some(({ key, state }) => state.val !== item[key]),
+ }),
+ WarningDialog(warningDialogOpen, pendingSaveAction, warnCde, warnPii),
+ );
+};
+
+const InheritedIcon = (/** @type string */ inheritedFrom) => withTooltip(
+ Icon({ size: 18, classes: 'text-disabled' }, 'layers'),
+ { text: `Inherited from ${inheritedFrom} tags`, position: 'top-right'},
+);
+
+/**
+ * @param {boolean|null} value
+ * @param {boolean} isColumn
+ * @param {boolean} isInherited
+ * @returns
+ */
+const CdeDisplay = (value, isColumn, isInherited) => {
+ if (value) {
+ return div(
+ { style: `width: ${attributeWidth}px` },
+ span(
+ { class: 'flex-row fx-gap-1 metadata-badge cde' },
+ Icon({ size: 24, classes: 'text-purple' }, 'star'),
+ span(isColumn ? 'Critical data element' : 'All critical data elements'),
+ (isColumn && isInherited) ? InheritedIcon('table') : null,
+ ),
+ );
+ }
+ return span(
+ { class: 'flex-row fx-gap-1', style: `width: ${attributeWidth}px` },
+ Icon({ size: 20, classes: 'text-disabled' }, 'cancel'),
+ span({ class: 'text-secondary' }, isColumn ? 'Not a critical data element' : 'Not all critical data elements'),
+ (isColumn && isInherited) ? InheritedIcon('table') : null,
+ );
+}
+
+const XdeDisplay = (/** @type boolean */ value) => {
+ if (value) {
+ return div(
+ { style: `width: ${attributeWidth}px` },
+ span(
+ { class: 'flex-row fx-gap-1 metadata-badge xde' },
+ Icon({ size: 20, classes: 'text-brown' }, 'visibility_off'),
+ span('Excluded data element'),
+ ),
+ );
+ }
+ return span(
+ { class: 'flex-row fx-gap-1', style: `width: ${attributeWidth}px` },
+ Icon({ size: 20, classes: 'text-disabled' }, 'visibility'),
+ span({ class: 'text-secondary' }, 'Not an excluded data element'),
+ );
+}
+
+const PiiDisplay = (/** @type string|null */ value) => {
+ if (value) {
+ let caption = null;
+ if (value !== 'MANUAL') {
+ const [ risk, type, detail ] = value.split('/'); // e.g., A/ID/Passport, B/DEMO/Financial
+ const typeLabel = pii_type_map[type];
+ caption = `${pii_risk_map[risk] ?? 'Moderate'} Risk${typeLabel ? ' - ' + typeLabel : ''}${detail && detail !== typeLabel ? ' / ' + detail : ''}`;
+ }
+ return div(
+ { style: `width: ${attributeWidth}px` },
+ span(
+ { class: 'flex-row fx-gap-1 metadata-badge pii' },
+ Icon({ size: 21, classes: 'text-orange' }, 'shield_person'),
+ span('PII data'),
+ caption ? withTooltip(Icon({ size: 16 }, 'help'), { text: caption }) : null,
+ ),
+ );
+ }
+ return span(
+ { class: 'flex-row fx-gap-1', style: `width: ${attributeWidth}px` },
+ Icon({ classes: 'text-disabled' }, 'remove_moderator'),
+ span({ class: 'text-secondary' }, 'Not PII data'),
+ );
+};
+
+/**
+ * @param {TagProperties} props
+ * @param {Object} selectedItems
+ * @returns
+ */
+const MetadataTagsMultiEdit = (props, selectedItems) => {
+ const columnCount = van.derive(() => selectedItems.val?.reduce((count, { children }) => count + children.length, 0));
+
+ const attributes = [
+ 'critical_data_element',
+ 'pii_flag',
+ 'excluded_data_element',
+ ...TAG_KEYS,
+ ].map(key => ({
+ key,
+ help: TAG_HELP[key],
+ label: key === 'pii_flag' ? 'PII' : capitalize(key.replaceAll('_', ' ')),
+ checkedState: van.state(null),
+ valueState: van.state(null),
+ }));
+
+ const warningDialogOpen = van.state(false);
+ const pendingSaveAction = van.state(null);
+ const warnCde = van.state(false);
+ const warnPii = van.state(false);
+
+ return div(
+ Card({
+ title: 'Edit Tags for Selection',
+ actionContent: span(
+ { class: 'text-secondary mr-4' },
+ span({ style: 'font-weight: 500' }, columnCount),
+ () => ` column${columnCount.val > 1 ? 's' : ''} selected`
+ ),
+ content: div(
+ { class: 'flex-column' },
+ attributes.map(({ key, label, help, checkedState, valueState }) => div(
+ { class: 'flex-row fx-gap-3' },
+ Checkbox({
+ checked: checkedState,
+ onChange: (checked) => checkedState.val = checked,
+ }),
+ div(
+ {
+ class: 'pb-4 flex-row',
+ style: `min-width: ${multiEditWidth}px`,
+ onclick: () => checkedState.val = true,
+ },
+ ['critical_data_element', 'excluded_data_element', 'pii_flag'].includes(key)
+ ? RadioGroup({
+ label,
+ width: multiEditWidth,
+ options: key === 'pii_flag' ? piiOptions : booleanOptions,
+ onChange: (value) => valueState.val = value,
+ disabled: key === 'pii_flag' && !props.piiEditable,
+ })
+ : Input({
+ label, help,
+ width: multiEditWidth,
+ height: 32,
+ placeholder: () => checkedState.val ? null : '(keep current values)',
+ autocompleteOptions: props.tagOptions?.[key],
+ onChange: (value) => valueState.val = value || null,
+ }),
+ ),
+ )),
+ div(
+ { class: 'flex-row fx-justify-content-flex-end fx-gap-3 mt-4' },
+ Button({
+ type: 'stroked',
+ label: 'Cancel',
+ width: 'auto',
+ onclick: props.onCancel,
+ }),
+ Button({
+ type: 'stroked',
+ color: 'primary',
+ label: 'Save',
+ width: 'auto',
+ disabled: () => attributes.every(({ checkedState }) => !checkedState.val),
+ onclick: () => {
+ const items = selectedItems.val.reduce((array, table) => {
+ const [ type, id ] = table.id.split('_');
+ array.push({ type, id });
+
+ table.children.forEach(column => {
+ const [ type, id ] = column.id.split('_');
+ array.push({ type, id });
+ });
+
+ return array;
+ }, []);
+
+ const tags = attributes.reduce((object, { key, checkedState, valueState }) => {
+ if (checkedState.val) {
+ object[key] = valueState.rawVal;
+ }
+ return object;
+ }, {});
+
+ warnCde.val = props.autoflagSettings.profile_flag_cdes && tags.critical_data_element !== undefined;
+ warnPii.val = props.autoflagSettings.profile_flag_pii && tags.pii_flag !== undefined;
+
+ if (warnCde.val || warnPii.val) {
+ const disableFlags = [];
+ if (warnCde.val) {
+ disableFlags.push('profile_flag_cdes');
+ }
+ if (warnPii.val) {
+ disableFlags.push('profile_flag_pii');
+ }
+ pendingSaveAction.val = () => emitEvent('TagsChanged', { payload: { items, tags, disable_flags: disableFlags } });;
+ warningDialogOpen.val = true;
+ } else {
+ emitEvent('TagsChanged', { payload: { items, tags } });
+ // Don't set multiEditMode to false here
+ // Otherwise this event gets superseded by the ItemSelected event
+ // Let the Streamlit rerun handle the state reset with 'last_saved_timestamp'
+ }
+ },
+ }),
+ ),
+ ),
+ }),
+ WarningDialog(warningDialogOpen, pendingSaveAction, warnCde, warnPii),
+ );
+};
+
+const WarningDialog = (open, pendingAction, warnCde, warnPii) => {
+ return Dialog(
+ { open, width: '40rem', onClose: () => open.val = false },
+ div(
+ { class: 'flex-column fx-gap-4' },
+ span(() => `This table group is currently configured to detect ${warnCde.val ? 'CDEs' : ''}${warnCde.val && warnPii.val ? ' and ' : ''}${warnPii.val ? 'PIIs' : ''} during profiling.`),
+ Alert(
+ { type: 'warn', icon: 'warning' },
+ 'To preserve your manual edits, autodetection will be turned off.',
+ ),
+ div(
+ { class: 'flex-row fx-justify-content-flex-end fx-gap-3 mt-4' },
+ Button({
+ type: 'stroked',
+ label: 'Cancel',
+ width: 'auto',
+ onclick: () => open.val = false,
+ }),
+ Button({
+ type: 'stroked',
+ color: 'primary',
+ label: 'OK',
+ width: 'auto',
+ onclick: () => {
+ open.val = false;
+ pendingAction.val?.();
+ },
+ }),
+ ),
+ ),
+ );
+};
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+.metadata-badge {
+ display: inline-flex;
+ padding: 4px 12px 4px 6px;
+ border-radius: 15px;
+ height: 30px;
+ box-sizing: border-box;
+}
+
+.metadata-badge.cde {
+ background-color: rgba(171, 71, 188, 0.15);
+}
+
+.metadata-badge.cde i {
+ margin-top: -3px;
+}
+
+.metadata-badge.pii {
+ background-color: rgba(255, 152, 0, 0.15);
+}
+
+.metadata-badge.xde {
+ background-color: rgba(141, 110, 99, 0.15);
+}
+`);
+
+export { MetadataTagsCard, MetadataTagsMultiEdit, TAG_KEYS };
diff --git a/testgen/ui/components/frontend/js/display_utils.js b/testgen/ui/components/frontend/js/display_utils.js
index c590c9a0..8dc0c9f5 100644
--- a/testgen/ui/components/frontend/js/display_utils.js
+++ b/testgen/ui/components/frontend/js/display_utils.js
@@ -2,6 +2,9 @@ function formatTimestamp(
/** @type number | string */ timestamp,
/** @type boolean */ showYear,
) {
+ if (timestamp === PII_REDACTED) {
+ return timestamp;
+ }
if (timestamp) {
let date = timestamp;
if (typeof timestamp === 'number') {
@@ -81,6 +84,9 @@ function humanReadableDuration(/** @type string */ duration, /** @type boolean *
}
function formatNumber(/** @type number | string */ number, /** @type number */ decimals = 3) {
+ if (number === PII_REDACTED) {
+ return number;
+ }
if (!['number', 'string'].includes(typeof number) || isNaN(number)) {
return '--';
}
@@ -173,6 +179,7 @@ const colorMap = {
}
const DISABLED_ACTION_TEXT = 'You do not have permissions to perform this action. Contact your administrator.';
+const PII_REDACTED = '[PII Redacted]';
export {
formatTimestamp,
@@ -187,4 +194,5 @@ export {
viewPortUnitsToPixels,
colorMap,
DISABLED_ACTION_TEXT,
+ PII_REDACTED,
};
diff --git a/testgen/ui/components/frontend/js/form_validators.js b/testgen/ui/components/frontend/js/form_validators.js
index 635b8b6a..a0a85d5b 100644
--- a/testgen/ui/components/frontend/js/form_validators.js
+++ b/testgen/ui/components/frontend/js/form_validators.js
@@ -120,11 +120,31 @@ function sizeLimit(limit) {
return validator;
}
+/**
+ * @typedef NotInOptions
+ * @type {object}
+ * @property {function(any): any} formatter
+ * @property {string} errorMessage
+ *
+ * @param {any[]} values
+ * @param {NotInOptions?} options
+ * @returns {Validator}
+ */
+function notIn(values, options) {
+ return (value) => {
+ if (value && values.includes(!!options?.formatter ? options.formatter(value) : value)) {
+ return options?.errorMessage ?? `Value cannot be any of: ${values.join(', ')}.`;
+ }
+ return null;
+ };
+}
+
export {
maxLength,
minLength,
numberBetween,
noSpaces,
+ notIn,
required,
requiredIf,
sizeLimit,
diff --git a/testgen/ui/components/frontend/js/main.js b/testgen/ui/components/frontend/js/main.js
index 8819548e..24df64fa 100644
--- a/testgen/ui/components/frontend/js/main.js
+++ b/testgen/ui/components/frontend/js/main.js
@@ -45,8 +45,10 @@ const componentLoaders = {
monitors_dashboard: () => import('./pages/monitors_dashboard.js').then(m => m.MonitorsDashboard),
table_monitoring_trends: () => import('./pages/table_monitoring_trends.js').then(m => m.TableMonitoringTrend),
test_results_chart: () => import('./pages/test_results_chart.js').then(m => m.TestResultsChart),
+ test_definition_notes: () => import('./pages/test_definition_notes.js').then(m => m.TestDefinitionNotes),
schema_changes_list: () => import('./components/schema_changes_list.js').then(m => m.SchemaChangesList),
edit_monitor_settings: () => import('./pages/edit_monitor_settings.js').then(m => m.EditMonitorSettings),
+ import_metadata_dialog: () => import('./pages/import_metadata_dialog.js').then(m => m.ImportMetadataDialog),
};
const TestGenComponent = async (/** @type {string} */ id, /** @type {object} */ props) => {
diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index 1e2f4dfb..33418b93 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -19,6 +19,8 @@
* @property {string} table_drop_date
* @property {boolean} critical_data_element
* @property {boolean} table_critical_data_element
+ * @property {boolean} excluded_data_element
+ * @property {boolean} pii_flag
* @property {string} data_source
* @property {string} source_system
* @property {string} source_process
@@ -40,6 +42,12 @@
* @type {object}
* @property {boolean} can_edit
* @property {boolean} can_navigate
+ * @property {boolean} can_view_pii
+ *
+ * @typedef AutoflagSettings
+ * @type {object}
+ * @property {boolean} profile_flag_cdes
+ * @property {boolean} profile_flag_pii
*
* @typedef Properties
* @type {object}
@@ -50,24 +58,21 @@
* @property {Object.} tag_values
* @property {string} last_saved_timestamp
* @property {Permissions} permissions
+ * @property {AutoflagSettings} autoflag_settings
*/
import van from '../van.min.js';
import { Tree } from '../components/tree.js';
-import { EditableCard } from '../components/editable_card.js';
-import { Attribute } from '../components/attribute.js';
-import { Input } from '../components/input.js';
import { Icon } from '../components/icon.js';
import { withTooltip } from '../components/tooltip.js';
import { Streamlit } from '../streamlit.js';
import { emitEvent, getRandomId, getValue, loadStylesheet } from '../utils.js';
import { ColumnDistributionCard } from '../data_profiling/column_distribution.js';
import { DataCharacteristicsCard } from '../data_profiling/data_characteristics.js';
-import { PotentialPIICard, HygieneIssuesCard, TestIssuesCard } from '../data_profiling/data_issues.js';
+import { HygieneIssuesCard, TestIssuesCard } from '../data_profiling/data_issues.js';
import { getColumnIcon, TABLE_ICON, LatestProfilingTime } from '../data_profiling/data_profiling_utils.js';
-import { RadioGroup } from '../components/radio_group.js';
import { Checkbox } from '../components/checkbox.js';
import { Select } from '../components/select.js';
-import { capitalize, caseInsensitiveIncludes } from '../display_utils.js';
+import { capitalize, caseInsensitiveIncludes, DISABLED_ACTION_TEXT } from '../display_utils.js';
import { TableSizeCard } from '../data_profiling/table_size.js';
import { Card } from '../components/card.js';
import { Button } from '../components/button.js';
@@ -75,6 +80,7 @@ import { Link } from '../components/link.js';
import { EMPTY_STATE_MESSAGE, EmptyState } from '../components/empty_state.js';
import { Portal } from '../components/portal.js';
import { TableCreateScriptCard } from '../data_profiling/table_create_script.js';
+import { MetadataTagsCard, MetadataTagsMultiEdit, TAG_KEYS } from '../data_profiling/metadata_tags.js';
const { div, h2, span } = van.tags;
@@ -82,27 +88,6 @@ const { div, h2, span } = van.tags;
const EMPTY_IMAGE = new Image(1, 1);
EMPTY_IMAGE.src = 'data:image/gif;base64,R0lGODlhAQABAIAAAP///wAAACH5BAEAAAAALAAAAAABAAEAAAICRAEAOw==';
-const TAG_KEYS = [
- 'data_source',
- 'source_system',
- 'source_process',
- 'business_domain',
- 'stakeholder_group',
- 'transform_level',
- 'aggregation_level',
- 'data_product',
-];
-const TAG_HELP = {
- data_source: 'Original source of the dataset',
- source_system: 'Enterprise system source for the dataset',
- source_process: 'Process, program, or data flow that produced the dataset',
- business_domain: 'Business division responsible for the dataset, e.g., Finance, Sales, Manufacturing',
- stakeholder_group: 'Data owners or stakeholders responsible for the dataset',
- transform_level: 'Data warehouse processing stage, e.g., Raw, Conformed, Processed, Reporting, or Medallion level (bronze, silver, gold)',
- aggregation_level: 'Data granularity of the dataset, e.g. atomic, historical, snapshot, aggregated, time-rollup, rolling, summary',
- data_product: 'Data domain that comprises the dataset',
-};
-
const DataCatalog = (/** @type Properties */ props) => {
loadStylesheet('data-catalog', stylesheet);
@@ -126,7 +111,7 @@ const DataCatalog = (/** @type Properties */ props) => {
label: table_name,
classes: table_drop_date ? 'text-disabled' : (table_add_date && (Date.now() - new Date(table_add_date * 1000).getTime()) < 7 * 86400000) ? 'text-bold' : '',
...TABLE_ICON,
- iconColor: record_ct === 0 ? 'red' : null,
+ iconClass: record_ct === 0 ? 'text-error' : null,
iconTooltip: record_ct === 0 ? 'No records detected' : null,
criticalDataElement: !!item.table_critical_data_element,
children: [],
@@ -136,11 +121,26 @@ const DataCatalog = (/** @type Properties */ props) => {
const columnNode = {
id: column_id,
label: column_name,
- classes: drop_date ? 'text-disabled' : (add_date && (Date.now() - new Date(add_date * 1000).getTime()) < 7 * 86400000) ? 'text-bold' : '',
+ classes: `column ${drop_date ? 'text-disabled' : (add_date && (Date.now() - new Date(add_date * 1000).getTime()) < 7 * 86400000) ? 'text-bold' : ''}`,
...getColumnIcon(item),
- iconColor: value_ct === 0 ? 'red' : null,
+ iconClass: value_ct === 0 ? 'text-error' : null,
iconTooltip: value_ct === 0 ? 'No non-null values detected' : null,
+ prefix: () => {
+ const icons = [];
+ if (item.critical_data_element ?? item.table_critical_data_element) {
+ icons.push(withTooltip(Icon({ size: 15, classes: 'text-purple' }, 'star'), { text: 'Critical data element', position: 'right' }));
+ }
+ if (item.excluded_data_element) {
+ icons.push(withTooltip(Icon({ size: 15, classes: 'text-brown' }, 'visibility_off'), { text: 'Excluded data element', position: 'right' }));
+ }
+ if (item.pii_flag) {
+ icons.push(withTooltip(Icon({ size: 15, classes: 'text-orange' }, 'shield_person'), { text: 'PII data', position: 'right' }));
+ }
+ return span({ class: 'tg-dh--column-prefix' }, ...icons);
+ },
criticalDataElement: !!(item.critical_data_element ?? item.table_critical_data_element),
+ excludedDataElement: !!item.excluded_data_element,
+ piiFlag: !!item.pii_flag,
};
TAG_KEYS.forEach(key => columnNode[key] = item[key] ?? item[`table_${key}`]);
tables[table_id].children.push(columnNode);
@@ -177,7 +177,7 @@ const DataCatalog = (/** @type Properties */ props) => {
tableName: van.state(true),
columnName: van.state(true),
};
- const filters = { criticalDataElement: van.state(false) };
+ const filters = { criticalDataElement: van.state(false), piiFlag: van.state(false), showExcluded: van.state(false) };
TAG_KEYS.forEach(key => filters[key] = van.state(null));
// To hold temporary state within the portals, which might be discarded by clicking outside
@@ -193,6 +193,7 @@ const DataCatalog = (/** @type Properties */ props) => {
const userCanEdit = getValue(props.permissions)?.can_edit ?? false;
const userCanNavigate = getValue(props.permissions)?.can_navigate ?? false;
+ const userCanViewPii = getValue(props.permissions)?.can_view_pii ?? false;
const projectSummary = getValue(props.project_summary);
return projectSummary.table_group_count > 0
@@ -208,7 +209,22 @@ const DataCatalog = (/** @type Properties */ props) => {
testId: 'table-group-filter',
onChange: (value) => emitEvent('TableGroupSelected', {payload: value}),
}),
- ExportOptions(treeNodes, multiSelectedItems),
+ div(
+ { class: 'flex-row fx-gap-2' },
+ userCanEdit
+ ? Button({
+ icon: 'upload',
+ type: 'stroked',
+ label: 'Import',
+ tooltip: 'Import metadata from CSV',
+ tooltipPosition: 'left',
+ width: 'fit-content',
+ style: 'background: var(--button-generic-background-color);',
+ onclick: () => emitEvent('ImportClicked', {}),
+ })
+ : null,
+ ExportOptions(treeNodes, multiSelectedItems, userCanEdit),
+ ),
),
() => treeNodes.val.length
? div(
@@ -233,6 +249,8 @@ const DataCatalog = (/** @type Properties */ props) => {
|| (!!node.children && !searchOptions.tableName.val)
|| (!node.children && !searchOptions.columnName.val))
|| ![ node.criticalDataElement, false ].includes(filters.criticalDataElement.val)
+ || ![ node.piiFlag, false ].includes(filters.piiFlag.val)
+ || (node.excludedDataElement && !filters.showExcluded.val)
|| TAG_KEYS.some(key => ![ node[key], null ].includes(filters[key].val)),
onApplySearchOptions: () => {
copyState(tempSearchOptions, searchOptions);
@@ -243,10 +261,12 @@ const DataCatalog = (/** @type Properties */ props) => {
searchOptions.columnName.val = true;
}
},
- hasActiveFilters: () => filters.criticalDataElement.val || TAG_KEYS.some(key => !!filters[key].val),
+ hasActiveFilters: () => filters.criticalDataElement.val || filters.piiFlag.val || filters.showExcluded.val || TAG_KEYS.some(key => !!filters[key].val),
onApplyFilters: () => copyState(tempFilters, filters),
onResetFilters: () => {
tempFilters.criticalDataElement.val = false;
+ tempFilters.piiFlag.val = false;
+ tempFilters.showExcluded.val = false;
TAG_KEYS.forEach(key => tempFilters[key].val = null);
},
},
@@ -273,11 +293,24 @@ const DataCatalog = (/** @type Properties */ props) => {
() => {
copyState(filters, tempFilters);
return div(
- Checkbox({
- label: 'Only critical data elements (CDEs)',
- checked: tempFilters.criticalDataElement,
- onChange: (checked) => tempFilters.criticalDataElement.val = checked,
- }),
+ div(
+ { class: 'flex-column fx-gap-3' },
+ Checkbox({
+ label: span({ class: 'flex-row fx-gap-1' }, 'Only critical data elements (CDEs)', Icon({ size: 18, classes: 'text-purple' }, 'star')),
+ checked: tempFilters.criticalDataElement,
+ onChange: (checked) => tempFilters.criticalDataElement.val = checked,
+ }),
+ Checkbox({
+ label: span({ class: 'flex-row fx-gap-1' }, 'Only PII data', Icon({ size: 18, classes: 'text-orange' }, 'shield_person')),
+ checked: tempFilters.piiFlag,
+ onChange: (checked) => tempFilters.piiFlag.val = checked,
+ }),
+ Checkbox({
+ label: span({ class: 'flex-row fx-gap-1' }, 'Show excluded data elements (XDEs)', Icon({ size: 18, classes: 'text-brown' }, 'visibility_off')),
+ checked: tempFilters.showExcluded,
+ onChange: (checked) => tempFilters.showExcluded.val = checked,
+ }),
+ ),
div(
{
class: 'flex-row fx-flex-wrap fx-gap-4 fx-justify-space-between mt-4',
@@ -314,7 +347,23 @@ const DataCatalog = (/** @type Properties */ props) => {
},
),
() => multiEditMode.val
- ? MultiEdit(props, multiSelectedItems, multiEditMode)
+ ? div(
+ { class: 'tg-dh--details flex-column' },
+ () => multiSelectedItems.val?.length
+ ? MetadataTagsMultiEdit(
+ {
+ tagOptions: getValue(props.tag_values),
+ piiEditable: userCanViewPii,
+ autoflagSettings: getValue(props.autoflag_settings) ?? {},
+ onCancel: () => multiEditMode.val = false,
+ },
+ multiSelectedItems,
+ )
+ : ItemEmptyState(
+ 'Select tables or columns on the left to edit their tags.',
+ 'edit_document',
+ )
+ )
: SelectedDetails(props, selectedItem.val),
)
: ConditionalEmptyState(projectSummary, userCanEdit, userCanNavigate),
@@ -322,7 +371,7 @@ const DataCatalog = (/** @type Properties */ props) => {
: ConditionalEmptyState(projectSummary, userCanEdit, userCanNavigate);
};
-const ExportOptions = (/** @type TreeNode[] */ treeNodes, /** @type SelectedNode[] */ selectedNodes) => {
+const ExportOptions = (/** @type TreeNode[] */ treeNodes, /** @type SelectedNode[] */ selectedNodes, /** @type boolean */ userCanEdit) => {
const exportOptionsDomId = `data-catalog-export-${getRandomId()}`;
const exportOptionsOpened = van.state(false);
@@ -332,7 +381,7 @@ const ExportOptions = (/** @type TreeNode[] */ treeNodes, /** @type SelectedNode
icon: 'download',
type: 'stroked',
label: 'Export',
- tooltip: 'Download columns to Excel',
+ tooltip: 'Download columns to Excel or CSV',
tooltipPosition: 'left',
width: 'fit-content',
style: 'background: var(--button-generic-background-color);',
@@ -399,6 +448,17 @@ const ExportOptions = (/** @type TreeNode[] */ treeNodes, /** @type SelectedNode
'Selected columns',
)
: null,
+ div(
+ {
+ class: 'tg-dh--export-option',
+ style: 'border-top: var(--button-stroked-border);',
+ onclick: () => {
+ emitEvent('ExportCsvClicked', {});
+ exportOptionsOpened.val = false;
+ },
+ },
+ 'Metadata CSV',
+ ),
),
),
];
@@ -407,6 +467,7 @@ const ExportOptions = (/** @type TreeNode[] */ treeNodes, /** @type SelectedNode
const SelectedDetails = (/** @type Properties */ props, /** @type Table | Column */ item) => {
const userCanEdit = getValue(props.permissions)?.can_edit ?? false;
const userCanNavigate = getValue(props.permissions)?.can_navigate ?? false;
+ const userCanViewPii = getValue(props.permissions)?.can_view_pii ?? false;
return item
? div(
@@ -429,11 +490,18 @@ const SelectedDetails = (/** @type Properties */ props, /** @type Table | Column
item.type === 'column'
? ColumnDistributionCard({ dataPreview: true, history: true }, item)
: TableSizeCard({}, item),
- TagsCard({ tagOptions: getValue(props.tag_values), editable: userCanEdit }, item),
- PotentialPIICard({ noLinks: !userCanNavigate }, item),
+ MetadataTagsCard(
+ {
+ tagOptions: getValue(props.tag_values),
+ editable: userCanEdit,
+ piiEditable: userCanViewPii,
+ autoflagSettings: getValue(props.autoflag_settings) ?? {},
+ },
+ item,
+ ),
HygieneIssuesCard({ noLinks: !userCanNavigate }, item),
TestIssuesCard({ noLinks: !userCanNavigate }, item),
- TestSuitesCard(item),
+ TestSuitesCard({ noLinks: !userCanNavigate }, item),
item.type === 'table'
? TableCreateScriptCard({}, item)
: null,
@@ -444,275 +512,52 @@ const SelectedDetails = (/** @type Properties */ props, /** @type Table | Column
);
};
-/**
-* @typedef TagProperties
-* @type {object}
-* @property {Object.} tagOptions
-* @property {boolean} editable
-*/
-const TagsCard = (/** @type TagProperties */ props, /** @type Table | Column */ item) => {
- const title = `${item.type} Tags `;
- const attributes = [
- 'description',
- 'critical_data_element',
- ...TAG_KEYS,
- ].map(key => ({
- key,
- help: TAG_HELP[key],
- label: capitalize(key.replaceAll('_', ' ')),
- state: van.state(item[key]),
- inheritTableGroup: item[`table_group_${key}`] ?? null, // Table group values inherited by table or column
- inheritTable: item[`table_${key}`] ?? null, // Table values inherited by column
- }));
-
- const InheritedIcon = (/** @type string */ inheritedFrom) => withTooltip(
- Icon({ size: 18, classes: 'text-disabled' }, 'layers'),
- { text: `Inherited from ${inheritedFrom} tags`, position: 'top-right'},
- );
- const width = 300;
- const descriptionWidth = 932;
-
- const content = div(
- { class: 'flex-row fx-flex-wrap fx-gap-4' },
- attributes.map(({ key, label, help, state, inheritTable, inheritTableGroup }) => {
- let value = state.rawVal ?? inheritTable ?? inheritTableGroup;
-
- if (key === 'critical_data_element') {
- return span(
- { class: 'flex-row fx-gap-1', style: `width: ${width}px` },
- Icon(
- { classes: value ? 'text-green' : 'text-disabled' },
- value ? 'check_circle' : 'cancel',
- ),
- span(
- { class: value ? '' : 'text-secondary' },
- item.type === 'column'
- ? (value ? 'Critical data element' : 'Not a critical data element')
- : (value ? 'All critical data elements' : 'Not all critical data elements'),
- ),
- (item.type === 'column' && state.rawVal === null) ? InheritedIcon('table') : null,
- );
- }
-
- const inheritedFrom = state.rawVal !== null ? null
- : inheritTable !== null ? 'table'
- : inheritTableGroup !== null ? 'table group'
- : null;
-
- if (inheritedFrom && value) {
- value = span(
- { class: 'flex-row fx-gap-1' },
- InheritedIcon(inheritedFrom),
- value,
- );
- }
- return Attribute({ label, help, value, width: key === 'description' ? descriptionWidth : width });
- }),
- );
-
- if (!props.editable) {
- return Card({ title, content });
- }
-
- // Define as function so the block is re-rendered with reset values when re-editing after a cancel
- const editingContent = () => div(
- { class: 'flex-row fx-flex-wrap fx-gap-4' },
- attributes.map(({ key, label, help, state, inheritTable, inheritTableGroup }) => {
- if (key === 'critical_data_element') {
- const options = [
- { label: 'Yes', value: true },
- { label: 'No', value: false },
- { label: 'Inherit', value: null },
- ];
- return RadioGroup({
- label, width, options,
- value: state.rawVal,
- onChange: (value) => state.val = value,
- });
- };
-
- return Input({
- label, help,
- width: key === 'description' ? descriptionWidth : width,
- height: 32,
- value: state.rawVal,
- placeholder: (inheritTable || inheritTableGroup) ? `Inherited: ${inheritTable ?? inheritTableGroup}` : null,
- autocompleteOptions: props.tagOptions?.[key],
- onChange: (value) => state.val = value || null,
- });
- }),
- );
-
- return EditableCard({
- title: `${item.type} Tags `,
- content, editingContent,
- onSave: () => {
- const items = [{ type: item.type, id: item.id }];
- const tags = attributes.reduce((object, { key, state }) => {
- object[key] = state.rawVal;
- return object;
- }, {});
- emitEvent('TagsChanged', { payload: { items, tags } });
- },
- // Reset states to original values on cancel
- onCancel: () => attributes.forEach(({ key, state }) => state.val = item[key]),
- hasChanges: () => attributes.some(({ key, state }) => state.val !== item[key]),
- });
-};
-
-const TestSuitesCard = (/** @type Table | Column */ item) => {
+const TestSuitesCard = (/** @type Properties */ props, /** @type Table | Column */ item) => {
return Card({
title: 'Related Test Suites',
content: div(
{ class: 'flex-column fx-gap-2' },
item.test_suites.map(({ id, name, test_count }) => div(
{ class: 'flex-row fx-gap-1' },
- Link({
- href: 'test-suites:definitions',
- params: {
- test_suite_id: id,
- table_name: item.table_name,
- column_name: item.column_name,
- },
- open_new: true,
- label: name,
- }),
+ props.noLinks
+ ? span(name)
+ : Link({
+ href: 'test-suites:definitions',
+ params: {
+ test_suite_id: id,
+ table_name: item.table_name,
+ column_name: item.column_name,
+ project_code: item.project_code,
+ },
+ open_new: true,
+ label: name,
+ }),
span({ class: 'text-caption' }, `(${test_count} test definitions)`),
))
),
actionContent: item.test_suites.length
- ? null
+ ? null
: item.drop_date
? span({ class: 'text-secondary' }, `No test definitions for ${item.type}`)
: span(
{ class: 'text-secondary flex-row fx-gap-1 fx-justify-content-flex-end' },
`No test definitions yet for ${item.type}.`,
- Link({
- href: 'test-suites',
- params: {
- project_code: item.project_code,
- table_group_id: item.table_group_id,
- },
- open_new: true,
- label: 'Go to Test Suites',
- right_icon: 'chevron_right',
- }),
+ props.noLinks
+ ? null
+ : Link({
+ href: 'test-suites',
+ params: {
+ project_code: item.project_code,
+ table_group_id: item.table_group_id,
+ },
+ open_new: true,
+ label: 'Go to Test Suites',
+ right_icon: 'chevron_right',
+ }),
),
});
};
-const MultiEdit = (/** @type Properties */ props, /** @type Object */ selectedItems, /** @type Object */ multiEditMode) => {
- const hasSelection = van.derive(() => selectedItems.val?.length);
- const columnCount = van.derive(() => selectedItems.val?.reduce((count, { children }) => count + children.length, 0));
-
- const attributes = [
- 'critical_data_element',
- ...TAG_KEYS,
- ].map(key => ({
- key,
- help: TAG_HELP[key],
- label: capitalize(key.replaceAll('_', ' ')),
- checkedState: van.state(null),
- valueState: van.state(null),
- }));
-
- const cdeOptions = [
- { label: 'Yes', value: true },
- { label: 'No', value: false },
- { label: 'Inherit', value: null },
- ];
- const tagOptions = getValue(props.tag_values) ?? {};
- const width = 400;
-
- return div(
- { class: 'tg-dh--details flex-column' },
- () => hasSelection.val
- ? Card({
- title: 'Edit Tags for Selection',
- actionContent: span(
- { class: 'text-secondary mr-4' },
- span({ style: 'font-weight: 500' }, columnCount),
- () => ` column${columnCount.val > 1 ? 's' : ''} selected`
- ),
- content: div(
- { class: 'flex-column' },
- attributes.map(({ key, label, help, checkedState, valueState }) => div(
- { class: 'flex-row fx-gap-3' },
- Checkbox({
- checked: checkedState,
- onChange: (checked) => checkedState.val = checked,
- }),
- div(
- {
- class: 'pb-4 flex-row',
- style: `min-width: ${width}px`,
- onclick: () => checkedState.val = true,
- },
- key === 'critical_data_element'
- ? RadioGroup({
- label, width,
- options: cdeOptions,
- onChange: (value) => valueState.val = value,
- })
- : Input({
- label, help, width,
- height: 32,
- placeholder: () => checkedState.val ? null : '(keep current values)',
- autocompleteOptions: tagOptions[key],
- onChange: (value) => valueState.val = value || null,
- }),
- ),
- )),
- div(
- { class: 'flex-row fx-justify-content-flex-end fx-gap-3 mt-4' },
- Button({
- type: 'stroked',
- label: 'Cancel',
- width: 'auto',
- onclick: () => multiEditMode.val = false,
- }),
- Button({
- type: 'stroked',
- color: 'primary',
- label: 'Save',
- width: 'auto',
- disabled: () => attributes.every(({ checkedState }) => !checkedState.val),
- onclick: () => {
- const items = selectedItems.val.reduce((array, table) => {
- const [ type, id ] = table.id.split('_');
- array.push({ type, id });
-
- table.children.forEach(column => {
- const [ type, id ] = column.id.split('_');
- array.push({ type, id });
- });
-
- return array;
- }, []);
-
- const tags = attributes.reduce((object, { key, checkedState, valueState }) => {
- if (checkedState.val) {
- object[key] = valueState.rawVal;
- }
- return object;
- }, {});
-
- emitEvent('TagsChanged', { payload: { items, tags } });
- // Don't set multiEditMode to false here
- // Otherwise this event gets superseded by the ItemSelected event
- // Let the Streamlit rerun handle the state reset with 'last_saved_timestamp'
- },
- }),
- ),
- ),
- })
- : ItemEmptyState(
- 'Select tables or columns on the left to edit their tags.',
- 'edit_document',
- ),
- );
-};
-
const ItemEmptyState = (/** @type string */ message, /** @type string */ icon) => {
return div(
{ class: 'flex-column fx-align-flex-center fx-justify-center tg-dh--no-selection' },
@@ -799,6 +644,18 @@ stylesheet.replace(`
background-color: var(--sidebar-background-color);
}
+.tg-dh--tree .tg-tree:not(.multi-select) .tg-tree--row.column {
+ margin-left: -30px;
+}
+
+.tg-dh--column-prefix {
+ display: inline-flex;
+ align-items: center;
+ justify-content: flex-end;
+ width: 34px;
+ flex-shrink: 0;
+}
+
.tg-dh--details {
padding-top: 8px;
overflow: auto;
diff --git a/testgen/ui/components/frontend/js/pages/edit_table_monitors.js b/testgen/ui/components/frontend/js/pages/edit_table_monitors.js
index 5fd564ae..c1edc25a 100644
--- a/testgen/ui/components/frontend/js/pages/edit_table_monitors.js
+++ b/testgen/ui/components/frontend/js/pages/edit_table_monitors.js
@@ -78,6 +78,11 @@ const EditTableMonitors = (/** @type Properties */ props) => {
onclick: () => selectedItem.val = { type: key, id: null },
},
span(label),
+ () => {
+ const id = getValue(props.definitions).find(td => td.test_type === key)?.id;
+ const state = formStates.val[id];
+ return state && !state.valid ? span({ class: 'text-error' }, ' *') : '';
+ },
)),
div({ class: 'edit-monitors--list-divider mt-3 mb-1' }),
div(
@@ -118,6 +123,10 @@ const EditTableMonitors = (/** @type Properties */ props) => {
span(
{ style: `text-overflow: ellipsis; ${!metric.column_name ? 'font-style: italic;' : ''}` },
metric.column_name || '(Unnamed Metric)',
+ () => {
+ const state = formStates.val[id];
+ return state && !state.valid ? span({ class: 'text-error' }, ' *') : '';
+ },
),
Button({
type: 'icon',
@@ -167,22 +176,26 @@ const EditTableMonitors = (/** @type Properties */ props) => {
...newMetrics.val,
[id]: { ...newMetrics.val[id], ...changes },
};
- } else {
+ formStates.val = { ...formStates.val, [id]: state };
+ } else if (state.dirty) {
updatedDefinitions.val = {
...updatedDefinitions.val,
- [id]: { ...changes, id },
+ [id]: { ...updatedDefinitions.rawVal[id], ...changes, id },
};
+ formStates.val = { ...formStates.val, [id]: state };
}
- formStates.val = { ...formStates.val, [id]: state };
},
});
}
const selectedDef = getValue(props.definitions).find(td => td.test_type === type);
if (!selectedDef) {
+ const message = type === 'Freshness_Trend'
+ ? 'Freshness monitor not yet configured. Run profiling to auto-generate.'
+ : 'Monitor not configured for this table.';
return Card({
class: 'edit-monitors--empty flex-row fx-justify-center',
- content: 'Monitor not configured for this table.',
+ content: message,
});
}
@@ -190,11 +203,13 @@ const EditTableMonitors = (/** @type Properties */ props) => {
definition: { ...selectedDef, ...updatedDefinitions.rawVal[selectedDef.id] },
class: 'edit-monitors--form',
onChange: (changes, state) => {
- updatedDefinitions.val = {
- ...updatedDefinitions.val,
- [selectedDef.id]: { ...changes, id: selectedDef.id },
- };
- formStates.val = { ...formStates.val, [selectedDef.id]: state };
+ if (state.dirty) {
+ updatedDefinitions.val = {
+ ...updatedDefinitions.val,
+ [selectedDef.id]: { ...updatedDefinitions.rawVal[selectedDef.id], ...changes, id: selectedDef.id },
+ };
+ formStates.val = { ...formStates.val, [selectedDef.id]: state };
+ }
},
});
},
diff --git a/testgen/ui/components/frontend/js/pages/import_metadata_dialog.js b/testgen/ui/components/frontend/js/pages/import_metadata_dialog.js
new file mode 100644
index 00000000..13084655
--- /dev/null
+++ b/testgen/ui/components/frontend/js/pages/import_metadata_dialog.js
@@ -0,0 +1,255 @@
+/**
+ * @typedef Properties
+ * @type {object}
+ * @property {object|null} preview
+ * @property {object|null} result
+ */
+import van from '../van.min.js';
+import { Streamlit } from '../streamlit.js';
+import { emitEvent, getValue, loadStylesheet, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange } from '../utils.js';
+import { RadioGroup } from '../components/radio_group.js';
+import { FileInput } from '../components/file_input.js';
+import { Button } from '../components/button.js';
+import { Alert } from '../components/alert.js';
+import { Table } from '../components/table.js';
+import { capitalize } from '../display_utils.js';
+import { withTooltip } from '../components/tooltip.js';
+import { sizeLimit } from '../form_validators.js';
+
+const CSV_SIZE_LIMIT = 2 * 1024 * 1024; // 2 MB
+
+const { div, i, span } = van.tags;
+
+const ImportMetadataDialog = (/** @type Properties */ props) => {
+ loadStylesheet('import-metadata-dialog', stylesheet);
+ Streamlit.setFrameHeight(1);
+ window.testgen.isPage = true;
+
+ const wrapperId = 'import-metadata-wrapper';
+ resizeFrameHeightToElement(wrapperId);
+ resizeFrameHeightOnDOMChange(wrapperId);
+
+ const blankBehavior = van.state('keep');
+ const fileValue = van.state(null);
+
+ return div(
+ { id: wrapperId, class: 'flex-column fx-gap-4' },
+ FileInput({
+ name: 'csv_file',
+ label: 'Upload metadata CSV file',
+ help: 'Use the Export menu on the Data Catalog page to download the current metadata as a CSV template.',
+ validators: [sizeLimit(CSV_SIZE_LIMIT)],
+ value: fileValue,
+ onChange: (value) => {
+ fileValue.val = value;
+ if (value?.content) {
+ emitEvent('FileUploaded', {
+ payload: {
+ content: value.content,
+ blank_behavior: blankBehavior.val,
+ },
+ });
+ } else {
+ emitEvent('FileCleared', {});
+ }
+ },
+ }),
+ RadioGroup({
+ label: 'When CSV values are blank',
+ help: 'Controls whether blank cells in the CSV overwrite existing metadata or leave it unchanged.',
+ options: [
+ { label: 'Keep existing values', value: 'keep' },
+ { label: 'Clear existing values', value: 'clear' },
+ ],
+ value: blankBehavior,
+ onChange: (value) => blankBehavior.val = value,
+ layout: 'default',
+ }),
+ () => {
+ const result = getValue(props.result);
+ if (result) {
+ return Alert(
+ { type: result.success ? 'success' : 'error', icon: result.success ? 'check_circle' : 'error' },
+ span(result.message),
+ );
+ }
+
+ const preview = getValue(props.preview);
+ if (!preview) {
+ return '';
+ }
+
+ const hasError = !!preview.error;
+ const tableCount = hasError ? 0 : (preview.table_count || 0);
+ const columnCount = hasError ? 0 : (preview.column_count || 0);
+ const skippedCount = hasError ? 0 : (preview.skipped_count || 0);
+ const hasMatches = tableCount + columnCount > 0;
+
+ const plural = (n, word) => `${n} ${n === 1 ? word : word + 's'}`;
+ const importedParts = [
+ tableCount ? plural(tableCount, 'table') : '',
+ columnCount ? plural(columnCount, 'column') : '',
+ ].filter(Boolean);
+ const importedText = importedParts.length
+ ? `Metadata for ${importedParts.join(', ')} will be imported`
+ : 'No metadata will be imported';
+ const skippedText = skippedCount ? `${plural(skippedCount, 'row')} skipped` : '';
+ const summaryText = [importedText, skippedText].filter(Boolean).join(' | ');
+
+ return div(
+ { class: 'flex-column fx-gap-3' },
+ hasError
+ ? ''
+ : span(
+ { class: 'text-secondary' },
+ summaryText,
+ ),
+ hasError
+ ? Alert({ type: 'error', icon: 'error' }, span(preview.error))
+ : PreviewTable(preview),
+ preview.pii_skipped
+ ? Alert(
+ { type: 'info', icon: 'info' },
+ 'PII data in this CSV will be ignored because you do not have permission to edit PII flags.',
+ )
+ : null,
+ preview.warn_cde || preview.warn_pii
+ ? Alert(
+ { type: 'warn', icon: 'warning' },
+ `This table group is currently configured to detect ${preview.warn_cde ? 'CDEs' : ''}${preview.warn_cde && preview.warn_pii ? ' and ' : ''}${preview.warn_pii ? 'PIIs' : ''} during profiling.
+ To preserve your imported edits, autodetection will be turned off.`,
+ )
+ : null,
+ div(
+ { class: 'flex-row fx-justify-content-flex-end' },
+ Button({
+ type: 'stroked',
+ color: 'primary',
+ label: 'Import Metadata',
+ icon: 'upload',
+ width: 'auto',
+ disabled: !hasMatches,
+ onclick: () => emitEvent('ImportConfirmed', {}),
+ }),
+ ),
+ );
+ },
+ );
+};
+
+const STATUS_ICONS = {
+ ok: 'check_circle',
+ warning: 'warning',
+ error: 'error',
+ unmatched: 'block',
+};
+
+const COLUMN_LABELS = {
+ critical_data_element: 'CDE',
+ excluded_data_element: 'XDE',
+ pii_flag: 'PII',
+};
+
+const PreviewTable = (preview) => {
+ const metadataColumns = preview.metadata_columns || [];
+ const previewRows = preview.preview_rows || [];
+
+ const columns = [
+ { name: '_status_icon', label: '', width: 32, overflow: 'visible' },
+ { name: 'table_name', label: 'Table', width: 150 },
+ { name: 'column_name', label: 'Column', width: 150 },
+ ...metadataColumns.map(col => ({
+ name: col,
+ label: COLUMN_LABELS[col] ?? capitalize(col.replaceAll('_', ' ')),
+ width: col === 'description' ? 200 : 120,
+ })),
+ ];
+
+ const rows = previewRows.map(row => {
+ const status = row._status || 'ok';
+ const icon = STATUS_ICONS[status] || STATUS_ICONS.ok;
+ const truncatedFields = row._truncated_fields || [];
+
+ const statusIcon = i(
+ {
+ class: `material-symbols-rounded import-status-${status}`,
+ style: 'font-size: 16px; cursor: default; overflow: visible; position: relative',
+ },
+ icon,
+ );
+
+ const tableRow = {
+ _status: status,
+ _status_icon: row._status_detail
+ ? withTooltip(statusIcon, { text: row._status_detail, position: 'right', width: 200 })
+ : statusIcon,
+ table_name: row.table_name ?? '',
+ column_name: row.column_name ?? '',
+ };
+
+ for (const col of metadataColumns) {
+ let val = row[col] ?? '';
+ if (truncatedFields.includes(col) && val) {
+ val += '\u2026';
+ }
+ tableRow[col] = val;
+ }
+
+ return tableRow;
+ });
+
+ return Table(
+ {
+ columns,
+ height: Math.min(300, 40 + rows.length * 40),
+ highDensity: true,
+ rowClass: (row) => {
+ if (row._status === 'unmatched') return 'import-row-unmatched';
+ if (row._status === 'error') return 'import-row-error';
+ if (row._status === 'warning') return 'import-row-warning';
+ return '';
+ },
+ },
+ rows,
+ );
+};
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+.import-status-ok {
+ color: var(--primary-color);
+}
+
+.import-status-warning {
+ color: var(--orange);
+}
+
+.import-status-error {
+ color: var(--error-color);
+}
+
+.import-status-unmatched {
+ color: var(--disabled-text-color);
+}
+
+.import-row-unmatched > td {
+ background-color: rgba(0, 0, 0, 0.03);
+ color: var(--disabled-text-color);
+}
+
+.import-row-error > td {
+ background-color: color-mix(in srgb, var(--error-color) 5%, transparent);
+}
+
+.import-row-warning > td {
+ background-color: color-mix(in srgb, var(--orange) 8%, transparent);
+}
+
+@media (prefers-color-scheme: dark) {
+ .import-row-unmatched > td {
+ background-color: rgba(255, 255, 255, 0.03);
+ }
+}
+`);
+
+export { ImportMetadataDialog };
diff --git a/testgen/ui/components/frontend/js/pages/monitors_dashboard.js b/testgen/ui/components/frontend/js/pages/monitors_dashboard.js
index b5294681..e8beabb9 100644
--- a/testgen/ui/components/frontend/js/pages/monitors_dashboard.js
+++ b/testgen/ui/components/frontend/js/pages/monitors_dashboard.js
@@ -254,6 +254,7 @@ const MonitorsDashboard = (/** @type Properties */ props) => {
type: 'icon',
tooltip: 'View table trends',
tooltipPosition: 'top-left',
+ disabled: monitor.freshness_is_pending && monitor.volume_is_pending && monitor.schema_is_pending && monitor.metric_is_pending,
style: 'color: var(--secondary-text-color);',
onclick: () => openChartsDialog(monitor),
}),
diff --git a/testgen/ui/components/frontend/js/pages/notification_settings.js b/testgen/ui/components/frontend/js/pages/notification_settings.js
index 570115de..a72be6d4 100644
--- a/testgen/ui/components/frontend/js/pages/notification_settings.js
+++ b/testgen/ui/components/frontend/js/pages/notification_settings.js
@@ -66,7 +66,7 @@ const NotificationSettings = (/** @type Properties */ props) => {
class: 'notifications--empty',
link: {
label: 'View documentation',
- href: 'https://docs.datakitchen.io/articles/dataops-testgen-help/configure-email-server',
+ href: 'https://docs.datakitchen.io/testgen/administer/configure-email-server/',
open_new: true,
},
});
@@ -209,8 +209,8 @@ const NotificationSettings = (/** @type Properties */ props) => {
),
duplicatedMessage
? div(
- { class: 'flex-row fx-gap-1 text-caption warning-text' },
- Icon({ size: 12, classes: 'warning-text' }, 'warning'),
+ { class: 'flex-row fx-gap-1 text-caption text-warning' },
+ Icon({ size: 12, classes: 'text-warning' }, 'warning'),
span({}, duplicatedMessage),
)
: '',
diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js
index d166795c..e041b5d1 100644
--- a/testgen/ui/components/frontend/js/pages/profiling_runs.js
+++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js
@@ -48,7 +48,7 @@ import { Link } from '../components/link.js';
import { Button } from '../components/button.js';
import { Streamlit } from '../streamlit.js';
import { emitEvent, getValue, loadStylesheet, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange } from '../utils.js';
-import { formatTimestamp, formatDuration, formatNumber } from '../display_utils.js';
+import { formatTimestamp, formatDuration, formatNumber, DISABLED_ACTION_TEXT } from '../display_utils.js';
import { Checkbox } from '../components/checkbox.js';
import { Select } from '../components/select.js';
import { Paginator } from '../components/paginator.js';
@@ -190,7 +190,7 @@ const ProfilingRuns = (/** @type Properties */ props) => {
),
),
div(
- paginatedRuns.val.map(item => ProfilingRunItem(item, columns, selectedRuns[item.id], userCanEdit)),
+ paginatedRuns.val.map(item => ProfilingRunItem(item, columns, selectedRuns[item.id], userCanEdit, projectSummary.project_code)),
),
),
Paginator({
@@ -280,6 +280,7 @@ const ProfilingRunItem = (
/** @type string[] */ columns,
/** @type boolean */ selected,
/** @type boolean */ userCanEdit,
+ /** @type string */ projectCode,
) => {
const runningStep = item.progress?.find((item) => item.status === 'Running');
@@ -363,7 +364,7 @@ const ProfilingRunItem = (
item.status === 'Complete' && item.column_ct ? Link({
label: 'View results',
href: 'profiling-runs:results',
- params: { 'run_id': item.id },
+ params: { 'run_id': item.id, 'project_code': projectCode },
underline: true,
right_icon: 'chevron_right',
}) : null,
@@ -381,7 +382,7 @@ const ProfilingRunItem = (
item.anomaly_ct ? Link({
label: `View ${item.anomaly_ct} issues`,
href: 'profiling-runs:hygiene',
- params: { 'run_id': item.id },
+ params: { 'run_id': item.id, 'project_code': projectCode },
underline: true,
right_icon: 'chevron_right',
style: 'margin-top: 4px;',
diff --git a/testgen/ui/components/frontend/js/pages/project_dashboard.js b/testgen/ui/components/frontend/js/pages/project_dashboard.js
index 292b0aba..79cb9c02 100644
--- a/testgen/ui/components/frontend/js/pages/project_dashboard.js
+++ b/testgen/ui/components/frontend/js/pages/project_dashboard.js
@@ -121,8 +121,8 @@ const ProjectDashboard = (/** @type Properties */ props) => {
{ class: 'flex-column mt-4' },
getValue(filteredTableGroups).map(tableGroup =>
tableGroup.monitoring_summary
- ? TableGroupCardWithMonitor(tableGroup)
- : TableGroupCard(tableGroup)
+ ? TableGroupCardWithMonitor(tableGroup, getValue(props.project_summary)?.project_code)
+ : TableGroupCard(tableGroup, getValue(props.project_summary)?.project_code)
)
)
: div(
@@ -133,7 +133,7 @@ const ProjectDashboard = (/** @type Properties */ props) => {
);
}
-const TableGroupCard = (/** @type TableGroupSummary */ tableGroup) => {
+const TableGroupCard = (/** @type TableGroupSummary */ tableGroup, /** @type string */ projectCode) => {
const useApprox = tableGroup.record_ct === null || tableGroup.record_ct === undefined;
return Card({
@@ -158,12 +158,12 @@ const TableGroupCard = (/** @type TableGroupSummary */ tableGroup) => {
${formatNumber(useApprox ? tableGroup.approx_data_point_ct : tableGroup.data_point_ct)} data points
${useApprox ? '*' : ''}`,
),
- TableGroupTestSuiteSummary(tableGroup.test_suites),
+ TableGroupTestSuiteSummary(tableGroup.test_suites, projectCode),
),
ScoreMetric(tableGroup.dq_score, tableGroup.dq_score_profiling, tableGroup.dq_score_testing),
),
hr({ class: 'tg-overview--table-group-divider' }),
- TableGroupLatestProfile(tableGroup),
+ TableGroupLatestProfile(tableGroup, projectCode),
useApprox
? span({ class: 'text-caption text-right' }, '* Approximate counts based on server statistics')
: null,
@@ -171,7 +171,7 @@ const TableGroupCard = (/** @type TableGroupSummary */ tableGroup) => {
});
};
-const TableGroupCardWithMonitor = (/** @type TableGroupSummary */ tableGroup) => {
+const TableGroupCardWithMonitor = (/** @type TableGroupSummary */ tableGroup, /** @type string */ projectCode) => {
const useApprox = tableGroup.record_ct === null || tableGroup.record_ct === undefined;
return Card({
testId: 'table-group-summary-card',
@@ -205,9 +205,9 @@ const TableGroupCardWithMonitor = (/** @type TableGroupSummary */ tableGroup) =>
),
hr({ class: 'tg-overview--table-group-divider' }),
- TableGroupTestSuiteSummary(tableGroup.test_suites),
+ TableGroupTestSuiteSummary(tableGroup.test_suites, projectCode),
hr({ class: 'tg-overview--table-group-divider' }),
- TableGroupLatestProfile(tableGroup),
+ TableGroupLatestProfile(tableGroup, projectCode),
useApprox
? span({ class: 'text-caption text-right' }, '* Approximate counts based on server statistics')
: null,
@@ -215,7 +215,7 @@ const TableGroupCardWithMonitor = (/** @type TableGroupSummary */ tableGroup) =>
});
};
-const TableGroupLatestProfile = (/** @type TableGroupSummary */ tableGroup) => {
+const TableGroupLatestProfile = (/** @type TableGroupSummary */ tableGroup, /** @type string */ projectCode) => {
if (!tableGroup.latest_profile_start) {
return div(
{ class: 'mt-1 mb-1 text-secondary' },
@@ -233,7 +233,7 @@ const TableGroupLatestProfile = (/** @type TableGroupSummary */ tableGroup) => {
Link({
label: formatTimestamp(tableGroup.latest_profile_start),
href: 'profiling-runs:results',
- params: { run_id: tableGroup.latest_profile_id },
+ params: { run_id: tableGroup.latest_profile_id, project_code: projectCode },
}),
daysAgo > staleProfileDays
? span({ class: 'text-error' }, `(${daysAgo} days ago)`)
@@ -246,6 +246,7 @@ const TableGroupLatestProfile = (/** @type TableGroupSummary */ tableGroup) => {
href: 'profiling-runs:hygiene',
params: {
run_id: tableGroup.latest_profile_id,
+ project_code: projectCode,
},
width: 150,
style: 'flex: 0 0 auto;',
@@ -264,7 +265,7 @@ const TableGroupLatestProfile = (/** @type TableGroupSummary */ tableGroup) => {
);
};
-const TableGroupTestSuiteSummary = (/** @type TestSuiteSummary[] */testSuites) => {
+const TableGroupTestSuiteSummary = (/** @type TestSuiteSummary[] */testSuites, /** @type string */ projectCode) => {
if (!testSuites?.length) {
return div(
{ class: 'mt-1 mb-1 text-secondary' },
@@ -287,7 +288,7 @@ const TableGroupTestSuiteSummary = (/** @type TestSuiteSummary[] */testSuites) =
Link({
label: suite.test_suite,
href: 'test-suites:definitions',
- params: { test_suite_id: suite.id },
+ params: { test_suite_id: suite.id, project_code: projectCode },
}),
span({ class: 'text-caption' }, `${suite.test_ct ?? 0} tests`),
),
@@ -295,7 +296,7 @@ const TableGroupTestSuiteSummary = (/** @type TestSuiteSummary[] */testSuites) =
? Link({
label: formatTimestamp(suite.latest_run_start),
href: 'test-runs:results',
- params: { run_id: suite.latest_run_id },
+ params: { run_id: suite.latest_run_id, project_code: projectCode },
style: 'flex: 1 1 25%;',
})
: span({ style: 'flex: 1 1 25%;' }, '--'),
diff --git a/testgen/ui/components/frontend/js/pages/quality_dashboard.js b/testgen/ui/components/frontend/js/pages/quality_dashboard.js
index 3378f21e..1637942a 100644
--- a/testgen/ui/components/frontend/js/pages/quality_dashboard.js
+++ b/testgen/ui/components/frontend/js/pages/quality_dashboard.js
@@ -81,7 +81,7 @@ const QualityDashboard = (/** @type {Properties} */ props) => {
right_icon: 'chevron_right',
href: 'quality-dashboard:score-details',
class: 'ml-4',
- params: { definition_id: score.id },
+ params: { definition_id: score.id, project_code: getValue(props.project_summary)?.project_code },
}),
{showHistory: true},
))
diff --git a/testgen/ui/components/frontend/js/pages/score_details.js b/testgen/ui/components/frontend/js/pages/score_details.js
index d80e7290..1bffa5c0 100644
--- a/testgen/ui/components/frontend/js/pages/score_details.js
+++ b/testgen/ui/components/frontend/js/pages/score_details.js
@@ -62,7 +62,7 @@ const ScoreDetails = (/** @type {Properties} */ props) => {
return userCanEdit ? div(
{ class: 'flex-row tg-test-suites--card-actions' },
Button({ type: 'icon', icon: 'notifications', tooltip: 'Configure Notifications', onclick: () => emitEvent('EditNotifications', {}) }),
- Button({ type: 'icon', icon: 'edit', tooltip: 'Edit Scorecard', onclick: () => emitEvent('LinkClicked', { href: 'quality-dashboard:explorer', params: { definition_id: score.id } }) }),
+ Button({ type: 'icon', icon: 'edit', tooltip: 'Edit Scorecard', onclick: () => emitEvent('LinkClicked', { href: 'quality-dashboard:explorer', params: { definition_id: score.id, project_code: score.project_code } }) }),
Button({ type: 'icon', icon: 'delete', tooltip: 'Delete Scorecard', onclick: () => emitEvent('DeleteScoreRequested', { payload: score.id }) }),
) : '';
},
@@ -86,7 +86,7 @@ const ScoreDetails = (/** @type {Properties} */ props) => {
getValue(props.score_type),
getValue(props.category),
getValue(props.drilldown),
- (project_code, name, score_type, category) => emitEvent('LinkClicked', { href: 'quality-dashboard:score-details', params: { definition_id: scoreId, score_type, category } }),
+ (project_code, name, score_type, category) => emitEvent('LinkClicked', { href: 'quality-dashboard:score-details', params: { definition_id: scoreId, project_code, score_type, category } }),
)
: ScoreBreakdown(
props.score,
@@ -95,7 +95,7 @@ const ScoreDetails = (/** @type {Properties} */ props) => {
props.score_type,
(project_code, name, score_type, category, drilldown) => emitEvent(
'LinkClicked',
- { href: 'quality-dashboard:score-details', params: { definition_id: scoreId, score_type, category, drilldown }
+ { href: 'quality-dashboard:score-details', params: { definition_id: scoreId, project_code, score_type, category, drilldown }
}),
)
);
diff --git a/testgen/ui/components/frontend/js/pages/score_explorer.js b/testgen/ui/components/frontend/js/pages/score_explorer.js
index 7bd64e02..55efd129 100644
--- a/testgen/ui/components/frontend/js/pages/score_explorer.js
+++ b/testgen/ui/components/frontend/js/pages/score_explorer.js
@@ -435,7 +435,7 @@ const Toolbar = (
let params = {project_code: definition.project_code};
if (!isNew_) {
href = 'quality-dashboard:score-details';
- params = {definition_id: definition.id};
+ params = {definition_id: definition.id, project_code: definition.project_code};
}
return Button({
diff --git a/testgen/ui/components/frontend/js/pages/table_group_wizard.js b/testgen/ui/components/frontend/js/pages/table_group_wizard.js
index 48ef56b3..1c7b0ad2 100644
--- a/testgen/ui/components/frontend/js/pages/table_group_wizard.js
+++ b/testgen/ui/components/frontend/js/pages/table_group_wizard.js
@@ -4,6 +4,10 @@
* @import { TableGroup } from '../components/table_group_form.js'
* @import { CronSample } from '../types.js'
*
+ * @typedef Permissions
+ * @type {object}
+ * @property {boolean} can_view_pii
+ *
* @typedef WizardResult
* @type {object}
* @property {boolean} success
@@ -20,6 +24,7 @@
* @property {Connection[]} connections
* @property {string[]?} steps
* @property {boolean?} is_in_use
+ * @property {Permissions} permissions
* @property {TableGroupPreview?} table_group_preview
* @property {CronSample?} standard_cron_sample
* @property {CronSample?} monitor_cron_sample
@@ -141,37 +146,41 @@ const TableGroupWizard = (props) => {
return '';
}
- return WizardProgressIndicator(
- [
+ const allIndicators = [
{
- index: 1,
title: 'Table Group',
skipped: false,
includedSteps: ['tableGroup', 'testTableGroup'],
},
{
- index: 2,
title: 'Profiling',
skipped: !stepsState.runProfiling.rawVal,
includedSteps: ['runProfiling'],
},
{
- index: 3,
title: 'Testing',
skipped: !stepsState.testSuite.rawVal.generate,
includedSteps: ['testSuite'],
},
{
- index: 4,
title: 'Monitors',
skipped: !stepsState.monitorSuite.rawVal.generate,
includedSteps: ['monitorSuite'],
},
- ],
+ ].filter(indicator => indicator.includedSteps.some(s => steps.includes(s)))
+ .map((indicator, i) => ({ ...indicator, index: i + 1 }));
+
+ if (allIndicators.length <= 1) {
+ return '';
+ }
+
+ return WizardProgressIndicator(
+ allIndicators,
{
index: stepIndex,
name: steps[stepIndex],
},
+ (stepName) => setStep(steps.indexOf(stepName)),
);
},
WizardStep(0, currentStepIndex, () => {
@@ -189,6 +198,7 @@ const TableGroupWizard = (props) => {
showConnectionSelector: connections.length > 1,
disableConnectionSelector: false,
disableSchemaField: props.is_in_use ?? false,
+ disablePiiFlag: !getValue(props.permissions)?.can_view_pii,
onChange: (updatedTableGroup, state) => {
stepsState.tableGroup.val = updatedTableGroup;
stepsValidity.tableGroup.val = state.valid;
diff --git a/testgen/ui/components/frontend/js/pages/test_definition_notes.js b/testgen/ui/components/frontend/js/pages/test_definition_notes.js
new file mode 100644
index 00000000..91cc9f48
--- /dev/null
+++ b/testgen/ui/components/frontend/js/pages/test_definition_notes.js
@@ -0,0 +1,266 @@
+/**
+ * @typedef Note
+ * @type {object}
+ * @property {string} id
+ * @property {string} detail
+ * @property {string} created_by
+ * @property {string?} created_at
+ * @property {string?} updated_at
+ *
+ * @typedef Properties
+ * @type {object}
+ * @property {{table: string, column: string, test: string}} test_label
+ * @property {Array} notes
+ * @property {string} current_user
+ */
+import van from '../van.min.js';
+import { Button } from '../components/button.js';
+import { Icon } from '../components/icon.js';
+import { Streamlit } from '../streamlit.js';
+import { emitEvent, getValue, loadStylesheet } from '../utils.js';
+import { ExpansionPanel } from '../components/expansion_panel.js';
+import { formatTimestamp } from '../display_utils.js';
+
+const minHeight = 400;
+const { div, span, textarea, p } = van.tags;
+
+/**
+ * @param {Properties} props
+ * @returns
+ */
+const TestDefinitionNotes = (props) => {
+ loadStylesheet('test-definition-notes', stylesheet);
+ window.testgen.isPage = true;
+
+ // Form state: shared between add and edit modes
+ const editNoteId = van.state(null);
+ const noteText = van.state('');
+ const isEdit = van.state(false);
+
+ const resetForm = () => {
+ editNoteId.val = null;
+ noteText.val = '';
+ isEdit.val = false;
+ };
+
+ /**
+ * @param {Note} note
+ * @param {string} currentUser
+ * @returns
+ */
+ const NoteItem = (note, currentUser) => {
+ const confirmingDelete = van.state(false);
+ const isOwner = note.created_by === currentUser;
+
+ return div(
+ { class: () => `tdn-note ${isEdit.val && editNoteId.val === note.id ? 'tdn-editing' : ''}` },
+ div(
+ { class: 'flex-row fx-gap-2' },
+ span({ class: 'text-bold text-small' }, `@${note.created_by}`),
+ span({ class: 'tdn-note-separator' }, '\u2014'),
+ span({ class: 'tdn-note-date' },
+ formatTimestamp(new Date(note.created_at), true),
+ note.updated_at ? ' (edited)' : '',
+ ),
+ isOwner ? div(
+ { class: 'tdn-note-actions' },
+ () => {
+ if (isEdit.val && editNoteId.val === note.id) {
+ return div(
+ { class: 'flex-row fx-gap-1 fx-align-center' },
+ Icon({ size: 18, classes: 'tdn-editing-indicator' }, 'edit'),
+ span({ class: 'tdn-editing-indicator text-caption' }, 'Editing'),
+ );
+ }
+ if (confirmingDelete.val) {
+ return div(
+ { class: 'flex-row fx-gap-1 fx-align-center' },
+ span({ class: 'text-caption' }, 'Delete?'),
+ Button({
+ label: 'Yes',
+ type: 'stroked',
+ color: 'warn',
+ onclick: () => emitEvent('NoteDeleted', { payload: { id: note.id } }),
+ }),
+ Button({
+ label: 'No',
+ type: 'stroked',
+ color: 'basic',
+ onclick: () => { confirmingDelete.val = false; },
+ }),
+ );
+ }
+ return div(
+ { class: 'flex-row fx-gap-1' },
+ Button({
+ type: 'icon',
+ icon: 'edit',
+ tooltip: 'Edit note',
+ onclick: () => {
+ isEdit.val = true;
+ editNoteId.val = note.id;
+ noteText.val = note.detail;
+ },
+ }),
+ Button({
+ type: 'icon',
+ icon: 'delete',
+ tooltip: 'Delete note',
+ tooltipPosition: 'top-left',
+ onclick: () => { confirmingDelete.val = true; },
+ }),
+ );
+ },
+ ) : null,
+ ),
+ p({ class: 'tdn-note-detail' }, note.detail),
+ );
+ };
+
+ return div(
+ { id: 'test-definition-notes', class: 'flex-column fx-gap-2', style: 'height: 100%; overflow-y: auto;' },
+ () => {
+ const label = getValue(props.test_label);
+ return div(
+ { class: 'flex-row fx-flex-wrap fx-gap-1' },
+ span({ class: 'text-secondary' }, 'Table: '), span(label.table),
+ span({ class: 'tdn-separator' }, '|'),
+ span({ class: 'text-secondary' }, 'Column: '), span(label.column),
+ span({ class: 'tdn-separator' }, '|'),
+ span({ class: 'text-secondary' }, 'Test: '), span(label.test),
+ );
+ },
+ () => ExpansionPanel(
+ {
+ title: isEdit.val
+ ? span({ class: 'tdn-editing-indicator' }, 'Edit Note')
+ : span({ class: 'text-green' }, 'Add Note'),
+ expanded: isEdit.val || getValue(props.notes).length === 0,
+ },
+ div(
+ { class: 'flex-column' },
+ textarea({
+ class: 'tdn-form-textarea',
+ placeholder: 'Type a note...',
+ value: noteText,
+ oninput: (e) => noteText.val = e.target.value,
+ rows: 3,
+ }),
+ div(
+ { class: 'flex-row fx-justify-content-flex-end fx-gap-2 mt-3' },
+ () => isEdit.val
+ ? Button({
+ type: 'stroked',
+ label: 'Cancel',
+ width: 'auto',
+ onclick: resetForm,
+ })
+ : '',
+ Button({
+ type: 'stroked',
+ label: isEdit.val ? 'Save Changes' : 'Add Note',
+ width: 'auto',
+ disabled: () => !noteText.val.trim(),
+ onclick: () => {
+ const text = noteText.rawVal.trim();
+ if (isEdit.rawVal) {
+ const id = editNoteId.rawVal;
+ resetForm();
+ emitEvent('NoteUpdated', { payload: { id, text } });
+ } else {
+ resetForm();
+ emitEvent('NoteAdded', { payload: { text } });
+ }
+ },
+ }),
+ ),
+ ),
+ ),
+ () => {
+ const notes = getValue(props.notes);
+ const currentUser = getValue(props.current_user);
+ Streamlit.setFrameHeight(Math.max(minHeight, 80 * notes.length + 200));
+
+ return notes.length > 0
+ ? div(
+ { class: 'flex-column fx-gap-2' },
+ ...notes.map(note => NoteItem(note, currentUser)),
+ )
+ : div(
+ { class: 'flex-column fx-gap-2 fx-align-flex-center mt-7 text-secondary' },
+ span({ class: 'text-large' }, 'No notes yet'),
+ span('Document context, decisions, or issues related to this test definition.'),
+ );
+ },
+ );
+};
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+.tdn-separator {
+ color: var(--disabled-text-color);
+ margin: 0 4px;
+}
+.tdn-form-textarea {
+ box-sizing: border-box;
+ width: 100%;
+ border-radius: 8px;
+ border: 1px solid transparent;
+ transition: border-color 0.3s;
+ background-color: var(--form-field-color);
+ padding: 8px 12px;
+ color: var(--primary-text-color);
+ font-family: inherit;
+ font-size: 14px;
+ resize: vertical;
+}
+.tdn-form-textarea:focus,
+.tdn-form-textarea:focus-visible {
+ outline: none;
+ border-color: var(--primary-color);
+}
+.tdn-form-textarea::placeholder {
+ font-style: italic;
+ color: var(--disabled-text-color);
+}
+.tdn-note {
+ padding: 4px 12px 12px;
+ border-radius: 8px;
+ background-color: var(--app-background-color);
+}
+@media (prefers-color-scheme: dark) {
+ .tdn-note {
+ background-color: var(--dk-card-background);
+ }
+}
+.tdn-note.tdn-editing {
+ background-color: var(--select-hover-background);
+}
+.tdn-editing-indicator {
+ color: var(--purple);
+}
+.tdn-note-separator {
+ color: var(--disabled-text-color);
+ font-size: 12px;
+}
+.tdn-note-date {
+ font-size: 12px;
+ color: var(--secondary-text-color);
+}
+.tdn-note-actions {
+ display: flex;
+ flex-direction: row;
+ align-items: center;
+ margin-left: auto;
+ gap: 2px;
+}
+.tdn-note-detail {
+ margin: 0;
+ font-size: 14px;
+ line-height: 1.5;
+ color: var(--primary-text-color);
+ white-space: pre-wrap;
+}
+`);
+
+export { TestDefinitionNotes };
diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js
index f05fa5ad..20e60bea 100644
--- a/testgen/ui/components/frontend/js/pages/test_runs.js
+++ b/testgen/ui/components/frontend/js/pages/test_runs.js
@@ -47,7 +47,7 @@ import { Link } from '../components/link.js';
import { Button } from '../components/button.js';
import { Streamlit } from '../streamlit.js';
import { emitEvent, getValue, loadStylesheet, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange } from '../utils.js';
-import { formatTimestamp, formatDuration } from '../display_utils.js';
+import { formatTimestamp, formatDuration, DISABLED_ACTION_TEXT } from '../display_utils.js';
import { Checkbox } from '../components/checkbox.js';
import { Select } from '../components/select.js';
import { Paginator } from '../components/paginator.js';
@@ -185,7 +185,7 @@ const TestRuns = (/** @type Properties */ props) => {
),
),
div(
- paginatedRuns.val.map(item => TestRunItem(item, columns, selectedRuns[item.test_run_id], userCanEdit)),
+ paginatedRuns.val.map(item => TestRunItem(item, columns, selectedRuns[item.test_run_id], userCanEdit, projectSummary.project_code)),
),
),
Paginator({
@@ -287,6 +287,7 @@ const TestRunItem = (
/** @type string[] */ columns,
/** @type boolean */ selected,
/** @type boolean */ userCanEdit,
+ /** @type string */ projectCode,
) => {
const runningStep = item.progress?.find((item) => item.status === 'Running');
@@ -307,7 +308,7 @@ const TestRunItem = (
Link({
label: formatTimestamp(item.test_starttime),
href: 'test-runs:results',
- params: { 'run_id': item.test_run_id },
+ params: { 'run_id': item.test_run_id, 'project_code': projectCode },
underline: true,
}),
div(
diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js
index a08d4770..abd95965 100644
--- a/testgen/ui/components/frontend/js/pages/test_suites.js
+++ b/testgen/ui/components/frontend/js/pages/test_suites.js
@@ -11,12 +11,14 @@
* @property {ProjectSummary} project_summary
* @property {TestSuiteSummary} test_suites
* @property {FilterOption[]} table_group_filter_options
+ * @property {string?} test_suite_name
* @property {Permissions} permissions
*/
import van from '../van.min.js';
import { Streamlit } from '../streamlit.js';
import { emitEvent, getValue, loadStylesheet, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange } from '../utils.js';
import { formatTimestamp, DISABLED_ACTION_TEXT } from '../display_utils.js';
+import { Input } from '../components/input.js';
import { Select } from '../components/select.js';
import { Button } from '../components/button.js';
import { Card } from '../components/card.js';
@@ -46,51 +48,77 @@ const TestSuites = (/** @type Properties */ props) => {
return projectSummary.test_suite_count > 0
? div(
{ class: 'tg-test-suites'},
- () => div(
- { class: 'flex-row fx-align-flex-end fx-justify-space-between fx-gap-4 mb-4' },
- Select({
- label: 'Table Group',
- value: getValue(props.table_group_filter_options)?.find((op) => op.selected)?.value ?? null,
- options: getValue(props.table_group_filter_options) ?? [],
- allowNull: true,
- style: 'font-size: 14px;',
- testId: 'table-group-filter',
- onChange: (value) => emitEvent('FilterApplied', {payload: value}),
- }),
- div(
- { class: 'flex-row fx-gap-3' },
- Button({
- icon: 'notifications',
- type: 'stroked',
- label: 'Notifications',
- tooltip: 'Configure email notifications for test runs',
- tooltipPosition: 'bottom',
- width: 'fit-content',
- style: 'background: var(--button-generic-background-color);',
- onclick: () => emitEvent('RunNotificationsClicked', {}),
- }),
- Button({
- icon: 'today',
- type: 'stroked',
- label: 'Schedules',
- tooltip: 'Manage when test suites should run',
- tooltipPosition: 'bottom',
- width: 'fit-content',
- style: 'background: var(--button-generic-background-color);',
- onclick: () => emitEvent('RunSchedulesClicked', {}),
- }),
- userCanEdit
- ? Button({
- icon: 'add',
+ () => {
+ const initialTableGroup = getValue(props.table_group_filter_options)?.find((op) => op.selected)?.value ?? null;
+ const initialTestSuiteName = getValue(props.test_suite_name) || null;
+ const selectedTableGroup = van.state(initialTableGroup);
+ const testSuiteNameFilter = van.state(initialTestSuiteName);
+
+ van.derive(() => {
+ if (selectedTableGroup.val !== initialTableGroup || testSuiteNameFilter.val !== initialTestSuiteName) {
+ emitEvent('FilterApplied', { payload: { table_group_id: selectedTableGroup.val, test_suite_name: testSuiteNameFilter.val } });
+ }
+ });
+
+ return div(
+ { class: 'flex-row fx-align-flex-end fx-justify-space-between fx-gap-4 fx-flex-wrap mb-4' },
+ div(
+ { class: 'flex-row fx-align-flex-end fx-gap-3' },
+ Select({
+ label: 'Table Group',
+ value: selectedTableGroup,
+ options: getValue(props.table_group_filter_options) ?? [],
+ allowNull: true,
+ style: 'font-size: 14px;',
+ testId: 'table-group-filter',
+ onChange: (value) => selectedTableGroup.val = value,
+ }),
+ Input({
+ testId: 'test-suite-name-filter',
+ icon: 'search',
+ label: '',
+ placeholder: 'Search test suite names',
+ width: 300,
+ clearable: true,
+ value: testSuiteNameFilter,
+ onChange: (value) => testSuiteNameFilter.val = value || null,
+ }),
+ ),
+ div(
+ { class: 'flex-row fx-gap-3' },
+ Button({
+ icon: 'notifications',
type: 'stroked',
- label: 'Add Test Suite',
+ label: 'Notifications',
+ tooltip: 'Configure email notifications for test runs',
+ tooltipPosition: 'bottom',
width: 'fit-content',
style: 'background: var(--button-generic-background-color);',
- onclick: () => emitEvent('AddTestSuiteClicked', {}),
- })
- : '',
- ),
- ),
+ onclick: () => emitEvent('RunNotificationsClicked', {}),
+ }),
+ Button({
+ icon: 'today',
+ type: 'stroked',
+ label: 'Schedules',
+ tooltip: 'Manage when test suites should run',
+ tooltipPosition: 'bottom',
+ width: 'fit-content',
+ style: 'background: var(--button-generic-background-color);',
+ onclick: () => emitEvent('RunSchedulesClicked', {}),
+ }),
+ userCanEdit
+ ? Button({
+ icon: 'add',
+ type: 'stroked',
+ label: 'Add Test Suite',
+ width: 'fit-content',
+ style: 'background: var(--button-generic-background-color);',
+ onclick: () => emitEvent('AddTestSuiteClicked', {}),
+ })
+ : '',
+ ),
+ );
+ },
() => getValue(testSuites)?.length
? div(
{ class: 'flex-column' },
@@ -140,7 +168,7 @@ const TestSuites = (/** @type Properties */ props) => {
{ class: 'flex-column' },
Link({
href: 'test-suites:definitions',
- params: { test_suite_id: testSuite.id },
+ params: { test_suite_id: testSuite.id, project_code: projectSummary.project_code },
label: `View ${testSuite.test_ct ?? 0} test definitions`,
right_icon: 'chevron_right',
right_icon_size: 20,
@@ -156,7 +184,7 @@ const TestSuites = (/** @type Properties */ props) => {
? [
Link({
href: 'test-runs:results',
- params: { run_id: testSuite.latest_run_id },
+ params: { run_id: testSuite.latest_run_id, project_code: projectSummary.project_code },
label: formatTimestamp(testSuite.latest_run_start),
class: 'mb-4',
}),
diff --git a/testgen/ui/components/frontend/js/streamlit.js b/testgen/ui/components/frontend/js/streamlit.js
index a30ace8c..5b90454c 100644
--- a/testgen/ui/components/frontend/js/streamlit.js
+++ b/testgen/ui/components/frontend/js/streamlit.js
@@ -7,14 +7,16 @@ const Streamlit = {
enableV2(handler) {
this._v2 = true;
this._customSendDataHandler = handler;
+ window.testgen = window.testgen || {};
+ window.testgen.isPage = true;
},
setFrameHeight(height) {
- if (!this._v2) {
+ if (!this || !this._v2) {
sendMessageToStreamlit('streamlit:setFrameHeight', { height: height });
}
},
sendData(data) {
- if (this._v2) {
+ if (this && this._v2) {
const event = data.event;
const triggerData = Object.fromEntries(Object.entries(data).filter(([k, v]) => k !== 'event'));
this._customSendDataHandler(event, triggerData);
diff --git a/testgen/ui/components/frontend/js/utils.js b/testgen/ui/components/frontend/js/utils.js
index 5dc5560f..d71d6ece 100644
--- a/testgen/ui/components/frontend/js/utils.js
+++ b/testgen/ui/components/frontend/js/utils.js
@@ -78,7 +78,7 @@ const stateProto = Object.getPrototypeOf(van.state());
/**
* Get value from van.state
* @template T
- * @param {T} prop
+ * @param {(import('./van.min.js').VanState | T)} prop
* @returns {T}
*/
function getValue(prop) { // van state or static value
diff --git a/testgen/ui/components/frontend/standalone/.gitkeep b/testgen/ui/components/frontend/standalone/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/testgen/ui/components/frontend/standalone/project_settings/index.js b/testgen/ui/components/frontend/standalone/project_settings/index.js
new file mode 100644
index 00000000..fa88c954
--- /dev/null
+++ b/testgen/ui/components/frontend/standalone/project_settings/index.js
@@ -0,0 +1,165 @@
+/**
+ * @import {VanState} from '/app/static/js/van.min.js';
+ */
+import van from '/app/static/js/van.min.js';
+import { Streamlit } from '/app/static/js/streamlit.js';
+import { Card } from '/app/static/js/components/card.js';
+import { Input } from '/app/static/js/components/input.js';
+import { Button } from '/app/static/js/components/button.js';
+import { required } from '/app/static/js/form_validators.js';
+import { Alert } from '/app/static/js/components/alert.js';
+import { emitEvent, getValue, isEqual } from '/app/static/js/utils.js';
+
+const { div, span } = van.tags;
+
+/**
+ * @typedef ObsTestResults
+ * @type {object}
+ * @property {boolean} successful
+ * @property {string} message
+ * @property {string?} details
+ *
+ * @typedef Properties
+ * @type {object}
+ * @property {VanState} name
+ * @property {VanState} observability_api_url
+ * @property {VanState} observability_api_key
+ * @property {VanState} observability_test_results
+ *
+ * @param {Properties} props
+ */
+const ProjectSettings = (props) => {
+ const /** @type Properties */ form = {
+ name: van.state(props.name.rawVal ?? ''),
+ observability_api_key: van.state(props.observability_api_key.rawVal ?? ''),
+ observability_api_url: van.state(props.observability_api_url.rawVal ?? ''),
+ };
+ const formValidity = {
+ name: van.state(!!form.name.rawVal),
+ observability_api_key: van.state(true),
+ observability_api_url: van.state(true),
+ };
+ const saveDisabled = van.derive(() => !formValidity.name.val || !formValidity.observability_api_url.val || !formValidity.observability_api_key.val);
+ const testObservabilityDisabled = van.derive(() => form.observability_api_url.val.length <= 0 || form.observability_api_key.val.length <= 0);
+
+ return div(
+ { class: 'flex-column fx-gap-3' },
+ div(
+ { class: 'flex-column fx-gap-1' },
+ span({ class: 'body m' }, 'Project Info'),
+ Card({
+ class: 'mb-0',
+ border: true,
+ content: div(
+ { class: 'flex-column fx-gap-3'},
+ Input({
+ label: 'Project Name',
+ value: form.name,
+ validators: [ required ],
+ onChange: (value, validity) => {
+ form.name.val = value;
+ formValidity.name.val = validity.valid;
+ },
+ }),
+ ),
+ }),
+ ),
+ div(
+ { class: 'flex-column fx-gap-1' },
+ span({ class: 'body m' }, 'Observability Integration'),
+ Card({
+ class: 'mb-0',
+ border: true,
+ content: div(
+ { class: 'flex-column fx-gap-3'},
+ Input({
+ label: 'API URL',
+ value: form.observability_api_url,
+ onChange: (value, validity) => {
+ form.observability_api_url.val = value;
+ formValidity.observability_api_url.val = validity.valid;
+ },
+ }),
+ Input({
+ label: 'API Key',
+ value: form.observability_api_key,
+ onChange: (value, validity) => {
+ form.observability_api_key.val = value;
+ formValidity.observability_api_key.val = validity.valid;
+ },
+ }),
+ div(
+ { class: 'flex-row' },
+ Button({
+ type: 'stroked',
+ color: 'basic',
+ label: 'Test Observability Connection',
+ width: 'auto',
+ disabled: testObservabilityDisabled,
+ onclick: () => emitEvent('TestObservabilityClicked', {
+ payload: {
+ observability_api_url: form.observability_api_url.rawVal,
+ observability_api_key: form.observability_api_key.rawVal,
+ },
+ }),
+ }),
+ ),
+ () => {
+ const results = getValue(props.observability_test_results) ?? {};
+ return Object.keys(results).length > 0
+ ? Alert(
+ { type: results.successful ? 'success' : 'error' },
+ div(
+ { class: 'flex-column' },
+ span(results.message),
+ results.details ? span(results.details) : '',
+ ),
+ )
+ : '';
+ },
+ ),
+ }),
+ ),
+ div(
+ { class: 'flex-row fx-justify-content-flex-end' },
+ Button({
+ type: 'stroked',
+ color: 'primary',
+ label: 'Save',
+ width: 'auto',
+ disabled: saveDisabled,
+ onclick: () => emitEvent('SaveClicked', {
+ payload: Object.fromEntries(Object.entries(form).map(([fieldName, value]) => [fieldName, value.rawVal]))
+ }),
+ }),
+ ),
+ );
+};
+
+export default (component) => {
+ const { data, setStateValue, setTriggerValue, parentElement } = component;
+
+ Streamlit.enableV2(setTriggerValue);
+
+ let componentState = parentElement.state;
+ if (componentState === undefined) {
+ componentState = {};
+ for (const [ key, value ] of Object.entries(data)) {
+ componentState[key] = van.state(value);
+ }
+
+ parentElement.state = componentState;
+ van.add(parentElement, ProjectSettings(componentState));
+ } else {
+ for (const [ key, value ] of Object.entries(data)) {
+ if (!isEqual(componentState[key].val, value)) {
+ componentState[key].val = value;
+ }
+ }
+ }
+
+ return () => {
+ Streamlit.disableV2(setTriggerValue);
+ parentElement.state = null;
+ };
+};
diff --git a/testgen/ui/components/utils/component.py b/testgen/ui/components/utils/component.py
index 330a42ac..9a25502b 100644
--- a/testgen/ui/components/utils/component.py
+++ b/testgen/ui/components/utils/component.py
@@ -44,7 +44,7 @@ def _is_change_callback(name: str) -> bool:
def _wrap_handler(key: str | None, callback_name: str | None, callback: Callable | None):
if key and callback_name and callback:
def wrapper():
- component_value = st.session_state[key] or {}
+ component_value = st.session_state.get(key) or {}
trigger_value_name = callback_name.removeprefix("on_").removesuffix("_change")
trigger_value = (component_value.get(trigger_value_name) or {}).get("payload")
return callback(trigger_value)
diff --git a/testgen/ui/components/widgets/__init__.py b/testgen/ui/components/widgets/__init__.py
index 63ff76d7..dbe7a776 100644
--- a/testgen/ui/components/widgets/__init__.py
+++ b/testgen/ui/components/widgets/__init__.py
@@ -53,3 +53,9 @@
js="pages/edit_table_monitors.js",
isolate_styles=False,
))
+
+project_settings = component_v2_wrapped(components_v2.component(
+ name="dataops-testgen.project_settings",
+ js="index.js",
+ isolate_styles=False,
+))
diff --git a/testgen/ui/components/widgets/download_dialog.py b/testgen/ui/components/widgets/download_dialog.py
index 712eeaa0..7e403fd1 100644
--- a/testgen/ui/components/widgets/download_dialog.py
+++ b/testgen/ui/components/widgets/download_dialog.py
@@ -9,6 +9,7 @@
import streamlit as st
from testgen.common import date_service
+from testgen.ui.services.rerun_service import safe_rerun
PROGRESS_UPDATE_TYPE = Callable[[float], None]
@@ -152,7 +153,7 @@ def render_button():
mime=file_type,
use_container_width=True,
):
- st.rerun()
+ safe_rerun()
with button_col:
render_button()
diff --git a/testgen/ui/components/widgets/page.py b/testgen/ui/components/widgets/page.py
index c6c68148..b85c8fdf 100644
--- a/testgen/ui/components/widgets/page.py
+++ b/testgen/ui/components/widgets/page.py
@@ -6,10 +6,11 @@
from testgen.ui.components.widgets.breadcrumbs import Breadcrumb
from testgen.ui.components.widgets.breadcrumbs import breadcrumbs as tg_breadcrumbs
from testgen.ui.components.widgets.testgen_component import testgen_component
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.session import session
from testgen.ui.views.dialogs.application_logs_dialog import application_logs_dialog
-UPGRADE_URL = "https://docs.datakitchen.io/articles/dataops-testgen-help/upgrade-testgen"
+UPGRADE_URL = "https://docs.datakitchen.io/testgen/administer/upgrade-testgen/"
def page_header(
@@ -47,7 +48,7 @@ def close_help(rerun: bool = False) -> None:
flex_row_end()
st.markdown("Help :material/keyboard_arrow_down:")
if rerun:
- st.rerun()
+ safe_rerun()
def open_app_logs():
close_help()
diff --git a/testgen/ui/components/widgets/sidebar.py b/testgen/ui/components/widgets/sidebar.py
index ec7c0ab2..f847dac0 100644
--- a/testgen/ui/components/widgets/sidebar.py
+++ b/testgen/ui/components/widgets/sidebar.py
@@ -2,6 +2,7 @@
import time
from collections.abc import Iterable
+from testgen.common.models import with_database_session
from testgen.common.models.project import Project
from testgen.common.version_service import Version
from testgen.ui.components.utils.component import component
@@ -23,6 +24,8 @@ def sidebar(
current_page: str | None = None,
version: Version | None = None,
support_email: str | None = None,
+ global_context: bool = False,
+ is_global_admin: bool = False,
) -> None:
"""
Testgen custom component to display a styled menu over streamlit's
@@ -33,6 +36,7 @@ def sidebar(
:param username: username to display at the bottom of the menu
:param menu: menu object with all root pages
:param current_page: page address to highlight the selected item
+ :param global_context: when True, renders admin-only sidebar (no project nav)
"""
component(
id_="sidebar",
@@ -42,16 +46,19 @@ def sidebar(
"menu": menu.filter_for_current_user().sort_items().unflatten().asdict(),
"current_page": current_page,
"username": session.auth.user_display,
- "role": session.auth.user.role if session.auth.user else None,
+ "role": "" if global_context else (session.auth.role or "-"),
"logout_path": LOGOUT_PATH,
"version": version.__dict__,
"support_email": support_email,
+ "global_context": global_context,
+ "is_global_admin": is_global_admin,
},
key=key,
on_change=on_change,
)
+@with_database_session
def on_change():
# We cannot navigate directly here
# because st.switch_page uses st.rerun under the hood
@@ -80,7 +87,8 @@ def on_change():
# (even though it works fine locally)
time.sleep(0.3)
else:
+ query_params = event_data.get("params", {})
Router().queue_navigation(
- to=event_data.get("path") or session.auth.default_page,
- with_args=event_data.get("params", {}),
+ to=event_data.get("path") or session.auth.get_default_page(project_code=query_params.get("project_code")),
+ with_args=query_params,
)
diff --git a/testgen/ui/components/widgets/testgen_component.py b/testgen/ui/components/widgets/testgen_component.py
index 8161a0b7..93dbe523 100644
--- a/testgen/ui/components/widgets/testgen_component.py
+++ b/testgen/ui/components/widgets/testgen_component.py
@@ -22,6 +22,7 @@
"table_group_wizard",
"help_menu",
"notification_settings",
+ "import_metadata_dialog",
]
diff --git a/testgen/ui/components/widgets/wizard.py b/testgen/ui/components/widgets/wizard.py
index 1b87da1e..31baeaa3 100644
--- a/testgen/ui/components/widgets/wizard.py
+++ b/testgen/ui/components/widgets/wizard.py
@@ -8,6 +8,7 @@
from testgen.ui.components import widgets as testgen
from testgen.ui.navigation.router import Router
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.session import temp_value
ResultsType = typing.TypeVar("ResultsType", bound=typing.Any | None)
@@ -175,7 +176,7 @@ def complete(self, container: DeltaGenerator) -> None:
do_rerun = self._on_complete(**kwargs)
self._reset()
if do_rerun:
- st.rerun()
+ safe_rerun()
def _reset(self) -> None:
del st.session_state[self._key]
diff --git a/testgen/ui/forms.py b/testgen/ui/forms.py
deleted file mode 100644
index 8ed6762f..00000000
--- a/testgen/ui/forms.py
+++ /dev/null
@@ -1,136 +0,0 @@
-from collections.abc import Callable, Generator
-from typing import Any, cast
-
-import streamlit as st
-from pydantic import BaseModel, Extra, Field # noqa: F401
-from pydantic.fields import FieldInfo
-from pydantic.schema import default_ref_template
-from streamlit.delta_generator import DeltaGenerator
-from streamlit_pydantic.ui_renderer import InputUI
-
-
-class BaseForm(BaseModel):
- @classmethod
- def empty(cls) -> "BaseForm":
- return cls.construct()
-
- @property
- def _disabled_fields(self) -> set[str]:
- if not getattr(self, "_disabled_fields_set", None):
- self._disabled_fields_set = set()
- return self._disabled_fields_set
-
- def disable(self, field: str) -> None:
- self._disabled_fields.add(field)
-
- def enable(self, field) -> None:
- self._disabled_fields.remove(field)
-
- @classmethod
- def schema(self_or_cls, by_alias: bool = True, ref_template: str = default_ref_template) -> dict[str, Any]:
- schema = super().schema(by_alias=by_alias, ref_template=ref_template)
-
- schema_properties: dict[str, dict] = schema.get("properties", {})
- disabled_fields: set[str] = getattr(self_or_cls, "_disabled_fields_set", set())
- for property_name, property_schema in schema_properties.items():
- if property_name in disabled_fields and not property_schema.get("readOnly"):
- property_schema["readOnly"] = True
-
- return schema
-
- @classmethod
- def get_field_label(cls, field_name: str) -> str:
- schema = cls.schema()
- schema_properties = schema.get("properties", {})
- field_schema = schema_properties[field_name]
- return field_schema.get("st_kwargs_label") or field_schema.get("title")
-
- def _iter(self, *args, **kwargs) -> Generator[tuple[str, Any], None, None]:
- """
- NOTE: can be removed in favor of `@computed_field` if
- streamlit-pydantic is ever updated to use pydantic 2.0.
- """
-
- for dict_key, value in super()._iter(*args, **kwargs):
- field_descriptor = self.__fields__.get(dict_key)
- is_computed_field = (
- field_descriptor is not None
- and isinstance(field_descriptor.field_info, ComputedField)
- )
- if is_computed_field:
- value = field_descriptor.field_info.get_value(self)
- yield dict_key, value
-
- class Config:
- extra = Extra.allow
- arbitrary_types_allowed = True
-
-
-def computed_field(default=None):
- def decorator(method: Callable) -> ComputedField:
- return ComputedField(method, default=default)
- return decorator
-
-
-class ComputedField(FieldInfo):
- def __init__(self, method: Callable, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self.func = method
-
- def get_value(self, instance: type[BaseForm]):
- return self.func(instance)
-
-
-class ManualRender:
- @property
- def input_ui(self):
- if not getattr(self, "_input_ui", None):
- self._input_ui = InputUI(
- self.form_key(),
- self, # type: ignore
- group_optional_fields="no", # type: ignore
- lowercase_labels=False,
- ignore_empty_values=False,
- )
- return self._input_ui
-
- def form_key(self):
- raise NotImplementedError
-
- def render_input_ui(self, container: DeltaGenerator, session_state: dict) -> "BaseForm":
- raise NotImplementedError
-
- def render_field(self, field_name: str, container: DeltaGenerator | None = None) -> Any:
- streamlit_container = container or self.input_ui._streamlit_container
- model_property = self.input_ui._schema_properties[field_name]
- initial_value = getattr(self, field_name, None) or self.input_ui._get_value(field_name)
- is_disabled = field_name in getattr(self, "_disabled_fields", set())
-
- if is_disabled:
- model_property["readOnly"] = True
-
- if model_property.get("type") != "boolean" and initial_value not in [None, ""]:
- model_property["init_value"] = initial_value
-
- new_value = self.input_ui._render_property(streamlit_container, field_name, model_property)
- self.update_field_value(field_name, new_value)
-
- return new_value
-
- def update_field_value(self, field_name: str, value: Any) -> Any:
- self.input_ui._store_value(field_name, value)
- setattr(self, field_name, value)
- return value
-
- def get_field_value(self, field_name: str, latest: bool = False) -> Any:
- if latest:
- return st.session_state.get(self.get_field_key(field_name))
- return self.input_ui._get_value(field_name)
-
- def reset_cache(self) -> None:
- for field_name in cast(type[BaseForm], type(self)).__fields__.keys():
- st.session_state.pop(self.get_field_key(field_name), None)
- st.session_state.pop(self.form_key() + "-data", None)
-
- def get_field_key(self, field_name: str) -> Any:
- return str(self.input_ui._session_state.run_id) + "-" + str(self.input_ui._key) + "-" + field_name
diff --git a/testgen/ui/navigation/page.py b/testgen/ui/navigation/page.py
index d80bee29..11e93f06 100644
--- a/testgen/ui/navigation/page.py
+++ b/testgen/ui/navigation/page.py
@@ -8,7 +8,6 @@
from streamlit.runtime.state.query_params_proxy import QueryParamsProxy
import testgen.ui.navigation.router
-from testgen.common.models.project import Project
from testgen.ui.auth import Permission
from testgen.ui.navigation.menu import MenuItem
from testgen.ui.session import session
@@ -33,20 +32,37 @@ def __init__(self, router: testgen.ui.navigation.router.Router) -> None:
def _navigate(self) -> None:
self.router.navigate_to_pending()
+
+ is_admin_page = self.permission == "global_admin"
+ requested_project = st.query_params.get("project_code")
+ if not is_admin_page and session.auth.user and requested_project and not session.auth.user_has_project_access(requested_project):
+ default_page = session.auth.get_default_page()
+ project_codes = session.auth.user.get_accessible_projects()
+ return self.router.navigate_with_warning(
+ "You do not have access to this project or it does not exist. Redirecting ...",
+ to=default_page,
+ with_args={"project_code": project_codes[0] if project_codes else None},
+ )
+
+ sidebar_project = session.sidebar_project
+ if not sidebar_project and session.auth.user:
+ project_codes = [requested_project] if requested_project else session.auth.user.get_accessible_projects()
+ sidebar_project = project_codes[0] if project_codes else None
+ session.sidebar_project = sidebar_project
+
permission_guard = lambda: session.auth.user_has_permission(self.permission) if self.permission else True
for guard in [ permission_guard, *(self.can_activate or []) ]:
can_activate = guard()
if can_activate != True:
- session.sidebar_project = session.sidebar_project or Project.select_where()[0].project_code
-
if type(can_activate) == str:
return self.router.navigate(to=can_activate, with_args={ "project_code": session.sidebar_project })
session.page_pending_login = self.path
session.page_args_pending_login = st.query_params.to_dict()
- default_page = session.auth.default_page or ""
+ default_page = session.auth.get_default_page(project_code=session.sidebar_project)
with_args = { "project_code": session.sidebar_project } if default_page else {}
+
return self.router.navigate(to=default_page, with_args=with_args)
self.render(**self._query_params_to_kwargs(st.query_params))
diff --git a/testgen/ui/navigation/router.py b/testgen/ui/navigation/router.py
index bb6ae98d..eaa43a52 100644
--- a/testgen/ui/navigation/router.py
+++ b/testgen/ui/navigation/router.py
@@ -8,7 +8,6 @@
import testgen.ui.navigation.page
from testgen.common.mixpanel_service import MixpanelService
-from testgen.common.models.project import Project
from testgen.common.models.settings import PersistedSetting
from testgen.ui.session import session
from testgen.utils.singleton import Singleton
@@ -54,7 +53,7 @@ def run(self) -> None:
if session.auth.logging_in:
session.auth.logging_in = False
- pending_route = session.page_pending_login or session.auth.default_page or ""
+ pending_route = session.page_pending_login or session.auth.get_default_page(project_code=session.sidebar_project)
pending_args = (
(session.page_args_pending_login or {})
if session.page_pending_login
@@ -133,7 +132,11 @@ def navigate(self, /, to: str, with_args: dict = {}) -> None: # noqa: B006
def navigate_with_warning(self, warning: str, to: str, with_args: dict = {}) -> None: # noqa: B006
st.warning(warning)
time.sleep(3)
- session.sidebar_project = session.sidebar_project or Project.select_where()[0].project_code
+ sidebar_project = session.sidebar_project
+ if session.auth.user and not sidebar_project:
+ project_codes = session.auth.user.get_accessible_projects()
+ sidebar_project = project_codes[0] if project_codes else None
+ session.sidebar_project = sidebar_project
self.navigate(to, {"project_code": session.sidebar_project, **with_args})
def set_query_params(self, with_args: dict) -> None:
diff --git a/testgen/ui/pdf/hygiene_issue_report.py b/testgen/ui/pdf/hygiene_issue_report.py
index df858ec1..58579577 100644
--- a/testgen/ui/pdf/hygiene_issue_report.py
+++ b/testgen/ui/pdf/hygiene_issue_report.py
@@ -118,6 +118,10 @@ def build_summary_table(document, hi_data):
"Critical data element : Yes" if hi_data["critical_data_element"] else "Critical data element : No",
style=PARA_STYLE_CELL,
),
+ Paragraph(
+ "PII : Yes" if hi_data["pii_flag"] else "PII : No",
+ style=PARA_STYLE_CELL,
+ ),
Paragraph(f"Description : {hi_data['column_description']}", style=PARA_STYLE_CELL)
if hi_data["column_description"]
else [],
@@ -139,7 +143,7 @@ def build_summary_table(document, hi_data):
),
(
Paragraph(
- f"""
+ f"""
View on TestGen >
""",
style=PARA_STYLE_LINK,
@@ -178,7 +182,7 @@ def build_sql_query_content(sample_data_tuple):
return Paragraph("No sample data lookup query registered for this issue.")
-def get_report_content(document, hi_data):
+def get_report_content(document, hi_data, mask_pii: bool = False):
yield Paragraph("TestGen Hygiene Issue Report", PARA_STYLE_TITLE)
yield build_summary_table(document, hi_data)
@@ -186,7 +190,7 @@ def get_report_content(document, hi_data):
yield Paragraph("Suggested Action", style=PARA_STYLE_H1)
yield Paragraph(hi_data["suggested_action"], style=PARA_STYLE_TEXT)
- sample_data_tuple = get_hygiene_issue_source_data(hi_data, limit=ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT)
+ sample_data_tuple = get_hygiene_issue_source_data(hi_data, limit=ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT, mask_pii=mask_pii)
yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT)
yield Paragraph("Sample Data", PARA_STYLE_H1)
@@ -198,6 +202,6 @@ def get_report_content(document, hi_data):
])
-def create_report(filename, hi_data):
+def create_report(filename, hi_data, mask_pii: bool = False):
doc = DatakitchenTemplate(filename)
- doc.build(flowables=list(get_report_content(doc, hi_data)))
+ doc.build(flowables=list(get_report_content(doc, hi_data, mask_pii=mask_pii)))
diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py
index 50f79b55..e6ce17f0 100644
--- a/testgen/ui/pdf/test_result_report.py
+++ b/testgen/ui/pdf/test_result_report.py
@@ -28,7 +28,10 @@
get_formatted_datetime,
)
from testgen.ui.pdf.templates import DatakitchenTemplate
-from testgen.ui.queries.source_data_queries import get_test_issue_source_data, get_test_issue_source_data_custom
+from testgen.ui.queries.source_data_queries import (
+ get_test_issue_source_data,
+ get_test_issue_source_data_custom,
+)
from testgen.ui.queries.test_result_queries import (
get_test_result_history,
)
@@ -131,6 +134,10 @@ def build_summary_table(document, tr_data):
"Critical data element : Yes" if tr_data["critical_data_element"] else "Critical data element : No",
style=PARA_STYLE_CELL,
),
+ Paragraph(
+ "PII : Yes" if tr_data["pii_flag"] else "PII : No",
+ style=PARA_STYLE_CELL,
+ ),
Paragraph(f"Description : {tr_data['column_description']}", style=PARA_STYLE_CELL)
if tr_data["column_description"]
else [],
@@ -152,7 +159,7 @@ def build_summary_table(document, tr_data):
),
(
Paragraph(
- f"""
+ f"""
View on TestGen >
""",
style=PARA_STYLE_LINK,
@@ -227,7 +234,7 @@ def build_sql_query_content(sample_data_tuple):
return Paragraph("No sample data lookup query registered for this test.")
-def get_report_content(document, tr_data):
+def get_report_content(document, tr_data, mask_pii: bool = False):
yield Paragraph("TestGen Test Issue Report", PARA_STYLE_TITLE)
yield build_summary_table(document, tr_data)
@@ -242,9 +249,9 @@ def get_report_content(document, tr_data):
yield build_history_table(document, tr_data)
if tr_data["test_type"] == "CUSTOM":
- sample_data_tuple = get_test_issue_source_data_custom(tr_data, limit=ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT)
+ sample_data_tuple = get_test_issue_source_data_custom(tr_data, limit=ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT, mask_pii=mask_pii)
else:
- sample_data_tuple = get_test_issue_source_data(tr_data, limit=ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT)
+ sample_data_tuple = get_test_issue_source_data(tr_data, limit=ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT, mask_pii=mask_pii)
yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT)
yield Paragraph("Sample Data", PARA_STYLE_H1)
@@ -256,6 +263,6 @@ def get_report_content(document, tr_data):
])
-def create_report(filename, tr_data):
+def create_report(filename, tr_data, mask_pii: bool = False):
doc = DatakitchenTemplate(filename)
- doc.build(flowables=list(get_report_content(doc, tr_data)))
+ doc.build(flowables=list(get_report_content(doc, tr_data, mask_pii=mask_pii)))
diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py
index 14f34b13..a0cb7873 100644
--- a/testgen/ui/queries/profiling_queries.py
+++ b/testgen/ui/queries/profiling_queries.py
@@ -80,7 +80,7 @@ def get_profiling_results(profiling_run_id: str, table_name: str | None = None,
query = f"""
SELECT
- id::VARCHAR,
+ profile_results.id::VARCHAR,
'column' AS type,
schema_name,
table_name,
@@ -107,8 +107,11 @@ def get_profiling_results(profiling_run_id: str, table_name: str | None = None,
AND table_name = profile_results.table_name
AND column_name = profile_results.column_name
) THEN 'Yes' END AS hygiene_issues,
- CASE WHEN query_error IS NOT NULL THEN 'Error: ' || query_error ELSE NULL END AS result_details
+ CASE WHEN query_error IS NOT NULL THEN 'Error: ' || query_error ELSE NULL END AS result_details,
+ tg.project_code,
+ tg.connection_id::VARCHAR AS connection_id
FROM profile_results
+ LEFT JOIN table_groups tg ON (profile_results.table_groups_id = tg.id)
WHERE profile_run_id = :profiling_run_id
AND table_name ILIKE :table_name
AND column_name ILIKE :column_name
@@ -242,16 +245,16 @@ def get_tables_by_condition(
-- Profile Run
table_chars.last_complete_profile_run_id::VARCHAR AS profile_run_id,
profiling_starttime AS profile_run_date,
- TRUE AS is_latest_profile
+ TRUE AS is_latest_profile,
+ table_groups.project_code,
+ table_groups.connection_id::VARCHAR AS connection_id
FROM data_table_chars table_chars
LEFT JOIN profiling_runs ON (
table_chars.last_complete_profile_run_id = profiling_runs.id
)
- {"""
LEFT JOIN table_groups ON (
table_chars.table_groups_id = table_groups.id
)
- """ if include_tags else ""}
{"""
LEFT JOIN active_test_definitions active_tests ON (
table_chars.table_groups_id = active_tests.table_groups_id
@@ -365,6 +368,8 @@ def get_columns_by_condition(
-- Column Tags
column_chars.description,
column_chars.critical_data_element,
+ column_chars.excluded_data_element,
+ column_chars.pii_flag,
{", ".join([ f"column_chars.{tag}" for tag in TAG_FIELDS ])},
-- Table Tags
table_chars.critical_data_element AS table_critical_data_element,
@@ -404,16 +409,16 @@ def get_columns_by_condition(
column_chars.dq_score_testing,
""" if include_scores else ""}
table_chars.approx_record_ct,
+ table_groups.project_code,
+ table_groups.connection_id::VARCHAR AS connection_id,
{COLUMN_PROFILING_FIELDS}
FROM data_column_chars column_chars
LEFT JOIN data_table_chars table_chars ON (
column_chars.table_id = table_chars.table_id
)
- {"""
LEFT JOIN table_groups ON (
column_chars.table_groups_id = table_groups.id
)
- """ if include_tags else ""}
LEFT JOIN profile_results ON (
column_chars.last_complete_profile_run_id = profile_results.profile_run_id
AND column_chars.schema_name = profile_results.schema_name
@@ -446,6 +451,7 @@ def get_hygiene_issues(profile_run_id: str, table_name: str, column_name: str |
anomaly_name,
issue_likelihood,
detail,
+ detail_redactable,
pii_risk
FROM profile_anomaly_results anomaly_results
LEFT JOIN profile_anomaly_types anomaly_types ON (
@@ -514,14 +520,15 @@ def get_profiling_anomalies(
WHEN t.issue_likelihood = 'Likely' THEN 2
WHEN t.issue_likelihood = 'Definite' THEN 1
END AS likelihood_order,
- t.anomaly_description, r.detail, t.suggested_action,
+ t.anomaly_description, r.detail, t.detail_redactable, t.suggested_action,
r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime, r.profile_run_id::VARCHAR,
- tg.table_groups_name,
+ tg.table_groups_name, tg.project_code,
-- These are used in the PDF report
dcc.functional_data_type,
dcc.description as column_description,
COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element,
+ dcc.pii_flag,
COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source,
COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system,
COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process,
diff --git a/testgen/ui/queries/scoring_queries.py b/testgen/ui/queries/scoring_queries.py
index f8d78bdd..9a892369 100644
--- a/testgen/ui/queries/scoring_queries.py
+++ b/testgen/ui/queries/scoring_queries.py
@@ -33,6 +33,7 @@ def get_score_card_issue_reports(selected_issues: list["SelectedIssue"]) -> list
types.anomaly_name,
types.anomaly_description,
results.detail,
+ types.detail_redactable,
results.schema_name,
results.table_name,
results.column_name,
@@ -42,10 +43,12 @@ def get_score_card_issue_reports(selected_issues: list["SelectedIssue"]) -> list
results.profile_run_id::VARCHAR,
types.suggested_action,
results.table_groups_id::VARCHAR,
+ results.project_code,
results.anomaly_id::VARCHAR,
column_chars.functional_data_type,
column_chars.description as column_description,
COALESCE(column_chars.critical_data_element, table_chars.critical_data_element) as critical_data_element,
+ column_chars.pii_flag,
COALESCE(column_chars.data_source, table_chars.data_source, groups.data_source) as data_source,
COALESCE(column_chars.source_system, table_chars.source_system, groups.source_system) as source_system,
COALESCE(column_chars.source_process, table_chars.source_process, groups.source_process) as source_process,
@@ -90,6 +93,8 @@ def get_score_card_issue_reports(selected_issues: list["SelectedIssue"]) -> list
results.schema_name,
results.table_name,
results.column_names,
+ column_chars.column_type,
+ results.result_message,
groups.table_groups_name,
suites.test_suite,
types.dq_dimension,
@@ -104,9 +109,11 @@ def get_score_card_issue_reports(selected_issues: list["SelectedIssue"]) -> list
results.test_suite_id,
results.test_definition_id::VARCHAR,
results.table_groups_id::VARCHAR,
+ groups.project_code,
types.id::VARCHAR AS test_type_id,
column_chars.description as column_description,
COALESCE(column_chars.critical_data_element, table_chars.critical_data_element) as critical_data_element,
+ column_chars.pii_flag,
COALESCE(column_chars.data_source, table_chars.data_source, groups.data_source) as data_source,
COALESCE(column_chars.source_system, table_chars.source_system, groups.source_system) as source_system,
COALESCE(column_chars.source_process, table_chars.source_process, groups.source_process) as source_process,
diff --git a/testgen/ui/queries/source_data_queries.py b/testgen/ui/queries/source_data_queries.py
index d1537023..48b307ff 100644
--- a/testgen/ui/queries/source_data_queries.py
+++ b/testgen/ui/queries/source_data_queries.py
@@ -9,10 +9,11 @@
from testgen.common.database.database_service import get_flavor_service, replace_params
from testgen.common.models.connection import Connection, SQLFlavor
from testgen.common.models.test_definition import TestDefinition
+from testgen.common.pii_masking import PII_REDACTED, get_pii_columns, mask_source_data_pii
from testgen.common.read_file import replace_templated_functions
from testgen.ui.services.database_service import fetch_from_target_db, fetch_one_from_db
from testgen.ui.utils import parse_fuzzy_date
-from testgen.utils import to_dataframe
+from testgen.utils import to_dataframe, to_sql_timestamp
LOG = logging.getLogger("testgen")
DEFAULT_LIMIT = 500
@@ -78,6 +79,7 @@ def generate_lookup_query(test_id: str, detail_exp: str, column_names: list[str]
def get_hygiene_issue_source_data(
issue_data: dict,
limit: int = DEFAULT_LIMIT,
+ mask_pii: bool = False,
) -> tuple[Literal["OK"], None, str, pd.DataFrame] | tuple[Literal["NA", "ND", "ERR"], str, str | None, None]:
lookup_query = None
try:
@@ -92,6 +94,15 @@ def get_hygiene_issue_source_data(
df = to_dataframe(results)
if limit:
df = df.sample(n=min(len(df), limit)).sort_index()
+ if mask_pii:
+ _mask_lookup_pii(
+ df,
+ issue_data["table_groups_id"],
+ issue_data["table_name"],
+ column_name=issue_data.get("column_name"),
+ test_type_id=issue_data.get("anomaly_id"),
+ error_type="Profile Anomaly",
+ )
return "OK", None, lookup_query, df
else:
return (
@@ -119,7 +130,7 @@ def get_test_issue_source_query(issue_data: dict, limit: int = DEFAULT_LIMIT) ->
"TABLE_NAME": issue_data["table_name"],
"COLUMN_NAME": issue_data["column_names"], # Don't quote this - queries already have quotes
"COLUMN_TYPE": issue_data["column_type"],
- "TEST_DATE": str(parsed_test_date) if (parsed_test_date := parse_fuzzy_date(issue_data["test_date"]))
+ "TEST_DATE": to_sql_timestamp(parsed_test_date) if (parsed_test_date := parse_fuzzy_date(issue_data["test_date"]))
else None,
"CUSTOM_QUERY": test_definition.custom_query,
"BASELINE_VALUE": test_definition.baseline_value,
@@ -159,6 +170,7 @@ def get_test_issue_source_query(issue_data: dict, limit: int = DEFAULT_LIMIT) ->
def get_test_issue_source_data(
issue_data: dict,
limit: int = DEFAULT_LIMIT,
+ mask_pii: bool = False,
) -> tuple[Literal["OK"], None, str, pd.DataFrame] | tuple[Literal["NA", "ND", "ERR"], str, str | None, None]:
lookup_query = None
try:
@@ -177,6 +189,15 @@ def get_test_issue_source_data(
df = to_dataframe(results)
if limit:
df = df.sample(n=min(len(df), limit)).sort_index()
+ if mask_pii:
+ _mask_lookup_pii(
+ df,
+ issue_data["table_groups_id"],
+ issue_data["table_name"],
+ column_name=issue_data.get("column_names"),
+ test_type_id=issue_data.get("test_type_id"),
+ error_type="Test Results",
+ )
return "OK", None, lookup_query, df
else:
return "ND", "Data that violates test criteria is not present in the current dataset.", lookup_query, None
@@ -203,6 +224,7 @@ def get_test_issue_source_query_custom(
def get_test_issue_source_data_custom(
issue_data: dict,
limit: int | None = None,
+ mask_pii: bool = False,
) -> tuple[Literal["OK"], None, str, pd.DataFrame] | tuple[Literal["NA", "ND", "ERR"], str, str | None, None]:
try:
test_definition = TestDefinition.get(issue_data["test_definition_id"])
@@ -220,6 +242,17 @@ def get_test_issue_source_data_custom(
df = to_dataframe(results)
if limit:
df = df.sample(n=min(len(df), limit)).sort_index()
+ if mask_pii:
+ _mask_lookup_pii(
+ df,
+ issue_data["table_groups_id"],
+ issue_data["table_name"],
+ )
+ # Mask user-defined redactable columns from the test definition
+ lookup_data = _get_lookup_data_custom(issue_data["test_definition_id"])
+ if lookup_data and lookup_data.lookup_redactable_columns:
+ redactable = {col.strip() for col in lookup_data.lookup_redactable_columns.split(",")}
+ mask_source_data_pii(df, redactable)
return "OK", None, lookup_query, df
else:
return "ND", "Data that violates test criteria is not present in the current dataset.", lookup_query, None
@@ -232,6 +265,50 @@ def get_test_issue_source_data_custom(
class LookupData:
lookup_query: str
sql_flavor: SQLFlavor | None = None
+ lookup_redactable_columns: str | None = None
+
+
+def _mask_lookup_pii(
+ df: pd.DataFrame,
+ table_group_id: str,
+ table_name: str,
+ column_name: str | None = None,
+ test_type_id: str | None = None,
+ error_type: Literal["Profile Anomaly", "Test Results"] | None = None,
+) -> None:
+ """Apply PII masking to a source data lookup DataFrame."""
+ pii_columns = get_pii_columns(table_group_id, table_name=table_name)
+ mask_source_data_pii(df, pii_columns)
+
+ # Row-level masking: if result has a column_name column listing which source column
+ # each row is about (e.g., table-level recency queries), mask value columns in rows
+ # where that source column is PII
+ if pii_columns and "column_name" in df.columns:
+ pii_lower = {c.lower() for c in pii_columns}
+ value_cols = [c for c in df.columns if c != "column_name"]
+ pii_rows = df["column_name"].str.lower().isin(pii_lower)
+ for col in value_cols:
+ if df[col].dtype != object:
+ df[col] = df[col].astype(object)
+ df.loc[pii_rows, col] = PII_REDACTED
+
+ # Also mask redactable columns if the test's target column is PII
+ if column_name and test_type_id and error_type and column_name.lower() in {c.lower() for c in pii_columns}:
+ result = fetch_one_from_db(
+ """
+ SELECT t.lookup_redactable_columns
+ FROM target_data_lookups t
+ INNER JOIN table_groups tg ON (:table_group_id = tg.id)
+ INNER JOIN connections c ON (tg.connection_id = c.connection_id AND t.sql_flavor = c.sql_flavor)
+ WHERE t.error_type = :error_type
+ AND t.test_id = :test_type_id
+ AND t.lookup_redactable_columns IS NOT NULL;
+ """,
+ {"table_group_id": table_group_id, "error_type": error_type, "test_type_id": test_type_id},
+ )
+ if result and result["lookup_redactable_columns"]:
+ redactable = {col.strip() for col in result["lookup_redactable_columns"].split(",")}
+ mask_source_data_pii(df, redactable)
def _get_lookup_data(
@@ -243,7 +320,8 @@ def _get_lookup_data(
"""
SELECT
t.lookup_query,
- c.sql_flavor
+ c.sql_flavor,
+ t.lookup_redactable_columns
FROM target_data_lookups t
INNER JOIN table_groups tg
ON (:table_group_id = tg.id)
@@ -269,7 +347,8 @@ def _get_lookup_data_custom(
result = fetch_one_from_db(
"""
SELECT
- d.custom_query as lookup_query
+ d.custom_query as lookup_query,
+ d.match_column_names as lookup_redactable_columns
FROM test_definitions d
WHERE d.id = :test_definition_id;
""",
diff --git a/testgen/ui/queries/test_result_queries.py b/testgen/ui/queries/test_result_queries.py
index ad35a8b4..7c73df03 100644
--- a/testgen/ui/queries/test_result_queries.py
+++ b/testgen/ui/queries/test_result_queries.py
@@ -15,6 +15,7 @@ def get_test_results(
column_name: str | None = None,
action: Literal["Confirmed", "Dismissed", "Muted", "No Action"] | None = None,
sorting_columns: list[str] | None = None,
+ flagged: bool | None = None,
) -> pd.DataFrame:
query = f"""
WITH run_results
@@ -59,12 +60,15 @@ def get_test_results(
c.id::VARCHAR as connection_id, r.test_suite_id::VARCHAR,
r.test_definition_id::VARCHAR,
r.auto_gen,
+ td.flagged,
+ (SELECT COUNT(*) FROM test_definition_notes tdn WHERE tdn.test_definition_id = td.id) as notes_count,
-- These are used in the PDF report
tt.threshold_description, tt.usage_notes, r.test_time,
dcc.description as column_description,
dcc.column_type as column_type,
COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element,
+ dcc.pii_flag,
COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source,
COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system,
COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process,
@@ -94,6 +98,9 @@ def get_test_results(
AND r.column_names = dcc.column_name)
LEFT JOIN data_table_chars dtc
ON dcc.table_id = dtc.table_id
+ LEFT JOIN test_definitions td
+ ON (r.test_definition_id = td.id)
+ {"WHERE td.flagged = :flagged" if flagged is not None else ""}
{f"ORDER BY {', '.join(' '.join(col) for col in sorting_columns)}" if sorting_columns else ""};
"""
params = {
@@ -105,10 +112,12 @@ def get_test_results(
"disposition": {
"Muted": "Inactive",
}.get(action, action),
+ "flagged": flagged,
}
df = fetch_df_from_db(query, params)
df["test_date"] = pd.to_datetime(df["test_date"])
+ df["flagged_display"] = df["flagged"].apply(lambda value: "Yes" if value else "No")
return df
diff --git a/testgen/ui/services/database_service.py b/testgen/ui/services/database_service.py
index cf5c7280..8877a423 100644
--- a/testgen/ui/services/database_service.py
+++ b/testgen/ui/services/database_service.py
@@ -11,11 +11,12 @@
from typing import Any
-from sqlalchemy import create_engine, text
+from sqlalchemy import text
from sqlalchemy.engine import Row, RowMapping
from sqlalchemy.engine.cursor import CursorResult
from testgen.common.database.database_service import get_flavor_service
+from testgen.common.database.flavor.flavor_service import resolve_connection_params
from testgen.common.models import get_current_session
@@ -54,15 +55,13 @@ def fetch_one_from_db(query: str, params: dict | None = None) -> RowMapping | No
def fetch_from_target_db(connection: Connection, query: str, params: dict | None = None) -> list[Row]:
+ connection_params = connection.to_dict()
flavor_service = get_flavor_service(connection.sql_flavor)
- flavor_service.init(connection.to_dict())
+ resolved = resolve_connection_params(connection_params)
+ engine = flavor_service.create_engine(connection_params)
- engine = create_engine(
- flavor_service.get_connection_string(),
- connect_args=flavor_service.get_connect_args(),
- **flavor_service.get_engine_args(),
- )
-
- with engine.connect() as connection:
- cursor: CursorResult = connection.execute(text(query), params)
+ with engine.connect() as conn:
+ for pre_query, pre_params in flavor_service.get_pre_connection_queries(resolved):
+ conn.execute(text(pre_query), pre_params)
+ cursor: CursorResult = conn.execute(text(query), params)
return cursor.fetchall()
diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py
index 70e8f752..948d65a1 100644
--- a/testgen/ui/services/form_service.py
+++ b/testgen/ui/services/form_service.py
@@ -11,6 +11,7 @@
from testgen.ui.components import widgets as testgen
from testgen.ui.navigation.router import Router
+from testgen.ui.services.rerun_service import safe_rerun
"""
Shared rendering of UI elements
@@ -24,7 +25,7 @@ def render_refresh_button(button_container):
with button_container:
do_refresh = st.button(":material/refresh:", help="Refresh page data", use_container_width=False)
if do_refresh:
- reset_post_updates("Refreshing page", True, True)
+ reset_post_updates("Refreshing page", as_toast=True)
def show_prompt(str_prompt=None):
@@ -61,7 +62,7 @@ def ut_prettify_header(str_header, expand=False):
return str_new
-def reset_post_updates(str_message=None, as_toast=False, clear_cache=True, lst_cached_functions=None, style="success"):
+def reset_post_updates(str_message=None, as_toast=False, style="success"):
if str_message:
if as_toast:
st.toast(str_message)
@@ -71,13 +72,7 @@ def reset_post_updates(str_message=None, as_toast=False, clear_cache=True, lst_c
st.success(str_message)
sleep(1.5)
- if clear_cache:
- if lst_cached_functions:
- for fcn in lst_cached_functions:
- fcn.clear()
- else:
- st.cache_data.clear()
- st.rerun()
+ safe_rerun()
def render_html_list(dct_row, lst_columns, str_section_header=None, int_data_width=300, lst_labels=None):
@@ -151,6 +146,7 @@ def render_grid_select(
reset_pagination: bool = False,
bind_to_query: bool = False,
render_highlights: bool = True,
+ column_styles: dict[str, dict] | None = None,
key: str = "aggrid",
) -> tuple[list[dict], dict]:
"""
@@ -343,6 +339,8 @@ def on_page_change():
# Merge common and date-time specific kwargs
all_kwargs = {**common_kwargs, **date_time_kwargs}
+ elif column_styles and column in column_styles:
+ all_kwargs = {**common_kwargs, "cellStyle": column_styles[column]}
else:
if render_highlights == True:
# Merge common and highlight-specific kwargs
@@ -394,14 +392,14 @@ def on_page_change():
selection.update([row[id_column] for row in selected_rows])
st.session_state[f"{key}_multiselection"] = selection
- if selection:
+ if selection:
# We need to get the data from the original dataframe
# Otherwise changes to the dataframe (e.g., editing the current selection) do not get reflected in the returned rows
# Adding "modelUpdated" to AgGrid(update_on=...) does not work
# because it causes unnecessary reruns that cause dialogs to close abruptly
selected_df = df[df[id_column].isin(selection)]
selected_data = json.loads(selected_df.to_json(orient="records"))
-
+
selected_id, selected_item = None, None
if selected_rows:
selected_id = selected_rows[len(selected_rows) - 1][id_column]
@@ -414,5 +412,5 @@ def on_page_change():
testgen.caption(f"{count} item{'s' if count != 1 else ''} selected")
return selected_data, selected_item
-
+
return None, None
diff --git a/testgen/ui/services/rerun_service.py b/testgen/ui/services/rerun_service.py
new file mode 100644
index 00000000..1f812a46
--- /dev/null
+++ b/testgen/ui/services/rerun_service.py
@@ -0,0 +1,19 @@
+from typing import Literal, NoReturn
+
+import streamlit as st
+
+from testgen.common.models import get_current_session
+
+
+def safe_rerun(*, scope: Literal["app", "fragment"] = "app") -> NoReturn:
+ """Commit any pending database changes, then trigger a Streamlit rerun.
+
+ Prevents data loss when RerunException propagates through the
+ session context manager in app.py:render(). Clears the Streamlit
+ data cache when a database session is active (writes may have occurred).
+ """
+ session = get_current_session()
+ if session:
+ session.commit()
+ st.cache_data.clear()
+ st.rerun(scope=scope)
diff --git a/testgen/ui/session.py b/testgen/ui/session.py
index e1525d37..9f50ed33 100644
--- a/testgen/ui/session.py
+++ b/testgen/ui/session.py
@@ -11,6 +11,7 @@
import streamlit as st
from streamlit.runtime.state import SessionStateProxy
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.utils.singleton import Singleton
T = TypeVar("T")
@@ -59,7 +60,7 @@ def __delattr__(self, key: str) -> None:
def set_sidebar_project(self, project_code: str) -> None:
if project_code != self.sidebar_project:
self.sidebar_project = project_code
- st.rerun()
+ safe_rerun()
def temp_value(session_key: str, *, default: T | None = None) -> tuple[TempValueGetter[T | None], TempValueSetter[T]]:
diff --git a/testgen/ui/static/css/shared.css b/testgen/ui/static/css/shared.css
index 8390aafe..9f6af80f 100644
--- a/testgen/ui/static/css/shared.css
+++ b/testgen/ui/static/css/shared.css
@@ -226,10 +226,26 @@ body {
color: var(--error-color);
}
+.text-warning {
+ color: var(--orange);
+}
+
.text-green {
color: var(--primary-color);
}
+.text-purple {
+ color: var(--purple);
+}
+
+.text-orange {
+ color: var(--orange);
+}
+
+.text-brown {
+ color: var(--brown);
+}
+
.text-capitalize {
text-transform: capitalize;
}
@@ -745,6 +761,38 @@ input::-ms-clear {
margin-top: 0;
}
-.warning-text {
- color: var(--orange);
-}
+/* Base Styles - Using standard system fonts for that Material feel */
+.display, .headline, .title, .body, .label {
+ margin: 0;
+ padding: 0;
+}
+
+/* --- Display: For prominent, large-scale text --- */
+.display { font-weight: 400; letter-spacing: -0.25px; }
+.display.l { font-size: 57px; line-height: 64px; }
+.display.m { font-size: 45px; line-height: 52px; }
+.display.s { font-size: 36px; line-height: 44px; }
+
+/* --- Headline: High-emphasis, shorter text --- */
+.headline { font-weight: 400; }
+.headline.l { font-size: 32px; line-height: 40px; }
+.headline.m { font-size: 28px; line-height: 36px; }
+.headline.s { font-size: 24px; line-height: 32px; }
+
+/* --- Title: Medium-emphasis, usually for UI headers --- */
+.title { font-weight: 400; } /* Title Large is 400, M/S are 500 */
+.title.l { font-size: 22px; line-height: 28px; }
+.title.m { font-size: 16px; line-height: 24px; font-weight: 500; letter-spacing: 0.15px; }
+.title.s { font-size: 14px; line-height: 20px; font-weight: 500; letter-spacing: 0.1px; }
+
+/* --- Body: For extended reading and long-form content --- */
+.body { font-weight: 400; }
+.body.l { font-size: 16px; line-height: 24px; letter-spacing: 0.5px; }
+.body.m { font-size: 14px; line-height: 20px; letter-spacing: 0.25px; }
+.body.s { font-size: 12px; line-height: 16px; letter-spacing: 0.4px; }
+
+/* --- Label: For small functional text (buttons, captions) --- */
+.label { font-weight: 500; }
+.label.l { font-size: 14px; line-height: 20px; letter-spacing: 0.1px; }
+.label.m { font-size: 12px; line-height: 16px; letter-spacing: 0.5px; }
+.label.s { font-size: 11px; line-height: 16px; letter-spacing: 0.5px; }
diff --git a/testgen/ui/static/css/style.css b/testgen/ui/static/css/style.css
index 2637dbd5..05f5768c 100644
--- a/testgen/ui/static/css/style.css
+++ b/testgen/ui/static/css/style.css
@@ -113,6 +113,10 @@ section.stSidebar > [data-testid="stSidebarContent"] {
[data-testid="stSidebarUserContent"] {
display: none;
}
+
+.stAppViewContainer:has(.tg-no-project) > .stSidebar {
+ display: none;
+}
/* */
/* Main content */
diff --git a/testgen/ui/static/js/components/attribute.js b/testgen/ui/static/js/components/attribute.js
index 61240f7f..a7bb60eb 100644
--- a/testgen/ui/static/js/components/attribute.js
+++ b/testgen/ui/static/js/components/attribute.js
@@ -8,11 +8,12 @@
* @property {string?} class
*/
import { getValue, loadStylesheet } from '../utils.js';
+import { PII_REDACTED } from '../display_utils.js';
import { Icon } from './icon.js';
import { withTooltip } from './tooltip.js';
import van from '../van.min.js';
-const { div } = van.tags;
+const { div, code } = van.tags;
const Attribute = (/** @type Properties */ props) => {
loadStylesheet('attribute', stylesheet);
@@ -33,6 +34,12 @@ const Attribute = (/** @type Properties */ props) => {
{ class: 'attribute-value' },
() => {
const value = getValue(props.value);
+ if (value === PII_REDACTED) {
+ return withTooltip(
+ code({ class: 'attribute-pii-redacted' }, 'PII Redacted'),
+ { text: 'You do not have permission to view PII data', position: 'top-right' },
+ );
+ }
return (value || value === 0) ? value : '--';
},
),
@@ -44,6 +51,16 @@ stylesheet.replace(`
.attribute-value {
word-wrap: break-word;
}
+
+.attribute-pii-redacted {
+ display: inline-block;
+ font-size: 12px;
+ padding: 2px 6px;
+ border-radius: 4px;
+ background: color-mix(in srgb, var(--disabled-text-color) 15%, transparent);
+ color: var(--disabled-text-color);
+ overflow: visible;
+}
`);
export { Attribute };
diff --git a/testgen/ui/static/js/components/button.js b/testgen/ui/static/js/components/button.js
index c78f2173..487aa1a0 100644
--- a/testgen/ui/static/js/components/button.js
+++ b/testgen/ui/static/js/components/button.js
@@ -18,7 +18,7 @@
import { emitEvent, enforceElementWidth, getValue, loadStylesheet } from '../utils.js';
import van from '../van.min.js';
import { Streamlit } from '../streamlit.js';
-import { Tooltip } from './tooltip.js';
+import { withTooltip } from './tooltip.js';
const { button, i, span } = van.tags;
const BUTTON_TYPE = {
@@ -52,24 +52,16 @@ const Button = (/** @type Properties */ props) => {
}
const onClickHandler = props.onclick || (() => emitEvent('ButtonClicked'));
- const showTooltip = van.state(false);
- return button(
+ const buttonEl = button(
{
id: getValue(props.id) ?? undefined,
class: () => `tg-button tg-${getValue(props.type)}-button tg-${getValue(props.color) ?? 'basic'}-button ${getValue(props.type) !== 'icon' && isIconOnly ? 'tg-icon-button' : ''}`,
style: () => `width: ${isIconOnly ? '' : (width ?? '100%')}; ${getValue(props.style)}`,
onclick: onClickHandler,
disabled: props.disabled,
- onmouseenter: props.tooltip ? (() => showTooltip.val = true) : undefined,
- onmouseleave: props.tooltip ? (() => showTooltip.val = false) : undefined,
'data-testid': getValue(props.testId) ?? '',
},
- () => window.testgen.isPage && getValue(props.tooltip) ? Tooltip({
- text: props.tooltip,
- show: showTooltip,
- position: props.tooltipPosition,
- }) : '',
span({class: 'tg-button-focus-state-indicator'}, ''),
props.icon ? i({
class: 'material-symbols-rounded',
@@ -77,6 +69,10 @@ const Button = (/** @type Properties */ props) => {
}, props.icon) : undefined,
!isIconOnly ? span(props.label) : undefined,
);
+
+ return getValue(props.tooltip)
+ ? withTooltip(buttonEl, { text: props.tooltip, position: props.tooltipPosition })
+ : buttonEl;
};
const stylesheet = new CSSStyleSheet();
diff --git a/testgen/ui/static/js/components/card.js b/testgen/ui/static/js/components/card.js
index b883b9b7..988d77db 100644
--- a/testgen/ui/static/js/components/card.js
+++ b/testgen/ui/static/js/components/card.js
@@ -9,16 +9,33 @@
* @property {string?} class
* @property {string?} testId
*/
-import { loadStylesheet } from '../utils.js';
+import { loadStylesheet, getValue } from '../utils.js';
import van from '../van.min.js';
const { div, h3 } = van.tags;
const Card = (/** @type Properties */ props) => {
loadStylesheet('card', stylesheet);
-
return div(
- { class: `tg-card mb-4 ${props.border ? 'tg-card-border' : ''} ${props.class}`, id: props.id ?? '', 'data-testid': props.testId ?? '' },
+ {
+ id: props.id ?? '',
+ 'data-testid': props.testId ?? '',
+ class: () => {
+ const classes = ['tg-card'];
+ if (getValue(props.border)) {
+ classes.push('tg-card-border');
+ }
+
+ if (!!props.class) {
+ classes.push(...props.class);
+ if (!props.class.includes('mb-') && !props.class.includes('m-')) {
+ classes.push('mb-4');
+ }
+ }
+
+ return classes.join(' ');
+ },
+ },
() =>
props.title || props.actionContent ?
div(
diff --git a/testgen/ui/static/js/components/connection_form.js b/testgen/ui/static/js/components/connection_form.js
index 011e425a..53100d97 100644
--- a/testgen/ui/static/js/components/connection_form.js
+++ b/testgen/ui/static/js/components/connection_form.js
@@ -83,6 +83,8 @@ const defaultPorts = {
postgresql: '5432',
snowflake: '443',
databricks: '443',
+ oracle: '1521',
+ sap_hana: '39015',
};
/**
@@ -234,6 +236,27 @@ const ConnectionForm = (props, saveButton) => {
connection,
dynamicConnectionUrl,
),
+ oracle: () => OracleForm(
+ updatedConnection,
+ getValue(props.flavors).find(f => f.value === connectionFlavor.rawVal),
+ (formValue, isValid) => {
+ updatedConnection.val = {...updatedConnection.val, ...formValue};
+ setFieldValidity('oracle_form', isValid);
+ },
+ connection,
+ dynamicConnectionUrl,
+ { dbNameLabel: 'Service Name' },
+ ),
+ sap_hana: () => OracleForm(
+ updatedConnection,
+ getValue(props.flavors).find(f => f.value === connectionFlavor.rawVal),
+ (formValue, isValid) => {
+ updatedConnection.val = {...updatedConnection.val, ...formValue};
+ setFieldValidity('sap_hana_form', isValid);
+ },
+ connection,
+ dynamicConnectionUrl,
+ ),
bigquery: () => BigqueryForm(
updatedConnection,
getValue(props.flavors).find(f => f.value === connectionFlavor.rawVal),
@@ -380,6 +403,7 @@ const ConnectionForm = (props, saveButton) => {
* @param {(params: Partial, isValid: boolean) => void} onChange
* @param {Connection?} originalConnection
* @param {VanState} dynamicConnectionUrl
+ * @param {{dbNameLabel: string}?} options
* @returns {HTMLElement}
*/
const RedshiftForm = (
@@ -388,6 +412,7 @@ const RedshiftForm = (
onChange,
originalConnection,
dynamicConnectionUrl,
+ options,
) => {
const isValid = van.state(true);
const connectByUrl = van.state(connection.rawVal.connect_by_url ?? false);
@@ -479,7 +504,7 @@ const RedshiftForm = (
),
Input({
name: 'db_name',
- label: 'Database',
+ label: options?.dbNameLabel || 'Database',
value: connectionDatabase,
disabled: connectByUrl,
onChange: (value, state) => {
@@ -552,6 +577,8 @@ const RedshiftSpectrumForm = RedshiftForm;
const PostgresqlForm = RedshiftForm;
+const OracleForm = RedshiftForm;
+
const AzureMSSQLForm = (
connection,
flavor,
@@ -766,10 +793,11 @@ const DatabricksForm = (
) => {
const isValid = van.state(true);
const connectByUrl = van.state(connection.rawVal?.connect_by_url ?? false);
+ const useOAuth = van.state(connection.rawVal?.connect_by_key ?? false);
const connectionHost = van.state(connection.rawVal?.project_host ?? '');
const connectionPort = van.state(connection.rawVal?.project_port || defaultPorts[flavor.flavor]);
const connectionHttpPath = van.state(connection.rawVal?.http_path ?? '');
- const connectionDatabase = van.state(connection.rawVal?.project_db ?? '');
+ const connectionCatalog = van.state(connection.rawVal?.project_db ?? '');
const connectionUsername = van.state(connection.rawVal?.project_user ?? '');
const connectionPassword = van.state(connection.rawVal?.project_pw_encrypted ?? '');
const connectionUrl = van.state(connection.rawVal?.url ?? '');
@@ -780,13 +808,13 @@ const DatabricksForm = (
onChange({
project_host: connectionHost.val,
project_port: connectionPort.val,
- project_db: connectionDatabase.val,
- project_user: connectionUsername.val,
+ project_db: connectionCatalog.val,
+ project_user: useOAuth.val ? connectionUsername.val : 'token',
project_pw_encrypted: connectionPassword.val,
http_path: connectionHttpPath.val,
connect_by_url: connectByUrl.val,
url: connectByUrl.val ? connectionUrl.val : connectionUrl.rawVal,
- connect_by_key: false,
+ connect_by_key: useOAuth.val,
}, isValid.val);
});
@@ -803,7 +831,7 @@ const DatabricksForm = (
{ class: 'flex-column border border-radius-1 p-3 mt-1 fx-gap-1', style: 'position: relative;' },
Caption({content: 'Server', style: 'position: absolute; top: -10px; background: var(--app-background-color); padding: 0px 8px;' }),
- RadioGroup({
+ () => useOAuth.val ? div() : RadioGroup({
label: 'Connect by',
options: [
{
@@ -868,16 +896,17 @@ const DatabricksForm = (
},
validators: [
requiredIf(() => !connectByUrl.val),
- maxLength(50),
+ maxLength(200),
],
}),
Input({
name: 'db_name',
- label: 'Database',
- value: connectionDatabase,
+ label: 'Catalog',
+ value: connectionCatalog,
+ value: connectionCatalog,
disabled: connectByUrl,
onChange: (value, state) => {
- connectionDatabase.val = value;
+ connectionCatalog.val = value;
validityPerField['db_name'] = state.valid;
isValid.val = Object.values(validityPerField).every(v => v);
},
@@ -906,38 +935,84 @@ const DatabricksForm = (
}),
),
),
-
div(
{ class: 'flex-column border border-radius-1 p-3 mt-1 fx-gap-1', style: 'position: relative;' },
Caption({content: 'Authentication', style: 'position: absolute; top: -10px; background: var(--app-background-color); padding: 0px 8px;' }),
- Input({
- name: 'db_user',
- label: 'Username',
- value: connectionUsername,
- onChange: (value, state) => {
- connectionUsername.val = value;
- validityPerField['db_user'] = state.valid;
- isValid.val = Object.values(validityPerField).every(v => v);
- },
- validators: [
- required,
- maxLength(50),
+ RadioGroup({
+ label: 'Authentication method',
+ options: [
+ {label: 'Access Token (PAT)', value: false},
+ {label: 'Service Principal (OAuth)', value: true},
],
- }),
- Input({
- name: 'password',
- label: 'Password',
- value: connectionPassword,
- type: 'password',
- passwordSuggestions: false,
- placeholder: (originalConnection?.connection_id && originalConnection?.project_pw_encrypted) ? secretsPlaceholder : '',
- onChange: (value, state) => {
- connectionPassword.val = value;
- validityPerField['password'] = state.valid;
+ value: useOAuth,
+ onChange: (value) => {
+ useOAuth.val = value;
+ connectionPassword.val = '';
+ delete validityPerField['password'];
+ if (value) {
+ connectByUrl.val = false;
+ delete validityPerField['db_user'];
+ }
isValid.val = Object.values(validityPerField).every(v => v);
},
+ layout: 'inline',
}),
+
+ () => {
+ if (useOAuth.val) {
+ return div(
+ { class: 'flex-column fx-gap-3' },
+ Input({
+ name: 'db_user',
+ label: 'Client ID',
+ value: connectionUsername,
+ onChange: (value, state) => {
+ connectionUsername.val = value;
+ validityPerField['db_user'] = state.valid;
+ isValid.val = Object.values(validityPerField).every(v => v);
+ },
+ validators: [
+ required,
+ maxLength(100),
+ ],
+ }),
+ Input({
+ name: 'password',
+ label: 'Client Secret',
+ value: connectionPassword,
+ type: 'password',
+ passwordSuggestions: false,
+ placeholder: (originalConnection?.connection_id && originalConnection?.project_pw_encrypted) ? secretsPlaceholder : '',
+ onChange: (value, state) => {
+ connectionPassword.val = value;
+ validityPerField['password'] = state.valid;
+ isValid.val = Object.values(validityPerField).every(v => v);
+ },
+ validators: [
+ requiredIf(() => !originalConnection?.connection_id || !originalConnection?.project_pw_encrypted),
+ ],
+ }),
+ );
+ }
+
+ return Input({
+ name: 'password',
+ label: 'Access Token',
+ value: connectionPassword,
+ type: 'password',
+ passwordSuggestions: false,
+ placeholder: (originalConnection?.connection_id && originalConnection?.project_pw_encrypted) ? secretsPlaceholder : '',
+ onChange: (value, state) => {
+ connectionPassword.val = value;
+ validityPerField['password'] = state.valid;
+ isValid.val = Object.values(validityPerField).every(v => v);
+ },
+ validators: [
+ requiredIf(() => !originalConnection?.connection_id || !originalConnection?.project_pw_encrypted),
+ ],
+ });
+ },
),
);
};
diff --git a/testgen/ui/static/js/components/crontab_input.js b/testgen/ui/static/js/components/crontab_input.js
index 5f0fc190..2701209b 100644
--- a/testgen/ui/static/js/components/crontab_input.js
+++ b/testgen/ui/static/js/components/crontab_input.js
@@ -86,7 +86,7 @@ const CrontabInput = (/** @type Options */ props) => {
}),
),
Portal(
- {target: domId.val, targetRelative: true, align: 'right', style: 'width: 500px;', opened},
+ {target: domId.val, align: 'right', style: 'width: 500px;', opened},
() => CrontabEditorPortal(
{
onChange: onEditorChange,
diff --git a/testgen/ui/static/js/components/dialog.js b/testgen/ui/static/js/components/dialog.js
new file mode 100644
index 00000000..0bcdbd1b
--- /dev/null
+++ b/testgen/ui/static/js/components/dialog.js
@@ -0,0 +1,134 @@
+/**
+ * @typedef DialogProps
+ * @type {object}
+ * @property {(string | import('../van.min.js').State)} title - Dialog title
+ * @property {import('../van.min.js').State} open - Reactive open state
+ * @property {Function} onClose - Called when the dialog is closed (backdrop click or X button)
+ * @property {string} [width] - CSS width value, default '30rem'
+ */
+import van from '../van.min.js';
+import { getValue, loadStylesheet } from '../utils.js';
+
+const { button, div, i, span } = van.tags;
+
+/**
+ * A dialog component that mimics Streamlit's dialog visual style.
+ * Opens as a fixed-position overlay covering the full viewport so it
+ * works from within any V2 component container, regardless of depth.
+ *
+ * Usage:
+ * const open = van.state(false);
+ *
+ * Dialog(
+ * { title: 'Confirm', open, onClose: () => open.val = false },
+ * div('Are you sure?'),
+ * Button({ label: 'Confirm', onclick: () => { doThing(); open.val = false; } }),
+ * )
+ *
+ * @param {DialogProps} props
+ * @param {...(Element | string)} children - Content rendered in the dialog body
+ */
+const Dialog = ({ title, open, onClose, width = '30rem' }, ...children) => {
+ loadStylesheet('dialog', stylesheet);
+
+ return div(
+ {
+ class: 'tg-dialog-overlay',
+ style: () => open.val ? '' : 'display: none',
+ onclick: () => onClose(),
+ },
+ div(
+ {
+ class: 'tg-dialog',
+ role: 'dialog',
+ 'aria-modal': 'true',
+ tabindex: '-1',
+ style: () => `width: ${getValue(width)}`,
+ onclick: (e) => e.stopPropagation(),
+ },
+ div(
+ { class: 'tg-dialog-header' },
+ span({ class: 'tg-dialog-title' }, title),
+ ),
+ div({ class: 'tg-dialog-content' }, ...children),
+ button(
+ {
+ class: 'tg-dialog-close',
+ 'aria-label': 'Close',
+ onclick: () => onClose(),
+ },
+ i({ class: 'material-symbols-rounded' }, 'close'),
+ ),
+ ),
+ );
+};
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+.tg-dialog-overlay {
+ position: fixed;
+ inset: 0;
+ z-index: 1000;
+ background: rgba(49, 51, 63, 0.5);
+ display: flex;
+ align-items: center;
+ justify-content: center;
+}
+
+.tg-dialog {
+ position: relative;
+ background: var(--portal-background, white);
+ border-radius: 8px;
+ box-shadow: var(--portal-box-shadow, 0 4px 32px rgba(0, 0, 0, 0.25));
+ max-width: calc(100vw - 2rem);
+ max-height: 80vh;
+ display: flex;
+ flex-direction: column;
+ overflow: hidden;
+}
+
+.tg-dialog-header {
+ padding: 1.5rem 3.5rem 0.75rem 1.5rem;
+ font-size: 1.5rem;
+ font-weight: 600;
+ line-height: 1.5;
+ display: flex;
+ align-items: center;
+ flex-shrink: 0;
+}
+
+.tg-dialog-content {
+ padding: 0.75rem 1.5rem 1.5rem;
+ overflow-y: auto;
+ color: var(--primary-text-color);
+}
+
+.tg-dialog-close {
+ position: absolute;
+ top: 0.75rem;
+ right: 0.75rem;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ width: 3rem;
+ height: 3rem;
+ padding: 0;
+ border: none;
+ border-radius: 4px;
+ background: transparent;
+ cursor: pointer;
+ color: var(--secondary-text-color);
+ transition: background 200ms;
+}
+
+.tg-dialog-close:hover {
+ background: rgba(0, 0, 0, 0.08);
+}
+
+.tg-dialog-close .material-symbols-rounded {
+ font-size: 24px;
+ line-height: 24px;
+}
+`);
+
+export { Dialog };
diff --git a/testgen/ui/static/js/components/help_menu.js b/testgen/ui/static/js/components/help_menu.js
index 3ea341db..45b2da24 100644
--- a/testgen/ui/static/js/components/help_menu.js
+++ b/testgen/ui/static/js/components/help_menu.js
@@ -23,9 +23,9 @@ import { Icon } from './icon.js';
const { a, div, span } = van.tags;
-const baseHelpUrl = 'https://docs.datakitchen.io/articles/dataops-testgen-help/';
-const releaseNotesTopic = 'testgen-release-notes';
-const upgradeTopic = 'upgrade-testgen';
+const baseHelpUrl = 'https://docs.datakitchen.io/testgen/';
+const releaseNotesTopic = 'release-notes/';
+const upgradeTopic = 'administer/upgrade-testgen/';
const slackUrl = 'https://data-observability-slack.datakitchen.io/join';
const trainingUrl = 'https://info.datakitchen.io/data-quality-training-and-certifications';
diff --git a/testgen/ui/static/js/components/input.js b/testgen/ui/static/js/components/input.js
index 130aba5c..1efb0924 100644
--- a/testgen/ui/static/js/components/input.js
+++ b/testgen/ui/static/js/components/input.js
@@ -132,7 +132,7 @@ const Input = (/** @type Properties */ props) => {
props.prefix,
)
: undefined,
- input({
+ () => input({
value,
name: props.name ?? '',
type: inputType,
@@ -173,7 +173,7 @@ const Input = (/** @type Properties */ props) => {
style: `top: ${((getValue(props.height) || defaultHeight) - addonIconSize) / 2}px`,
onclick: () => inputType.val = passwordFieldTypeSwitch[inputType.val],
},
- inputType.val === 'password' ? 'visibility' : 'visibility_off',
+ () => inputType.val === 'password' ? 'visibility' : 'visibility_off',
)
: '',
showClearable
@@ -194,7 +194,7 @@ const Input = (/** @type Properties */ props) => {
? small({ class: 'tg-input--error' }, firstError)
: '',
Portal(
- { target: domId.val, targetRelative: true, opened: autocompleteOpened },
+ { target: domId.val, opened: autocompleteOpened },
() => div(
{ class: 'tg-input--options-wrapper' },
autocompleteOptions.val?.map(option =>
diff --git a/testgen/ui/static/js/components/portal.js b/testgen/ui/static/js/components/portal.js
index 12fa2e70..272a619a 100644
--- a/testgen/ui/static/js/components/portal.js
+++ b/testgen/ui/static/js/components/portal.js
@@ -1,13 +1,14 @@
/**
* Container for any floating elements anchored to another element.
+ * The portal element is appended to document.body so position: absolute
+ * is document-relative, avoiding issues with positioned ancestors.
*
* NOTE: Ensure options is an object and turn individual properties into van.state
* if dynamic updates are needed.
- *
+ *
* @typedef Options
* @type {object}
* @property {string} target
- * @property {boolean?} targetRelative
* @property {boolean} opened
* @property {'left' | 'right'} align
* @property {('top' | 'bottom')?} position
@@ -19,48 +20,148 @@ import { getValue } from '../utils.js';
const { div } = van.tags;
+const STREAMLIT_DIALOG_ZINDEX = 1000060;
+const STREAMLIT_DIALOG_CLASS = 'stDialog';
+
const Portal = (/** @type Options */ options, ...args) => {
- const { target, targetRelative, align = 'left', position = 'bottom' } = getValue(options);
+ const { target, align = 'left', position = 'bottom' } = getValue(options);
const id = `${target}-portal`;
+ let outsideClickHandler = null;
+
+ const close = () => { options.opened.val = false; };
+
+ window.testgen.portals[id] = { domId: id, targetId: target, opened: options.opened, close };
+
+ // Side-effect derive: manages close loop and outside-click handler.
+ // Kept free of van.add / DOM creation to avoid corrupting VanJS dependency tracking.
+ van.derive(() => {
+ const isOpen = getValue(options.opened);
+
+ if (!isOpen) {
+ if (outsideClickHandler) {
+ document.removeEventListener('click', outsideClickHandler, true);
+ outsideClickHandler = null;
+ }
+ return;
+ }
+
+ const anchor = document.getElementById(target);
+ if (!anchor) return;
- window.testgen.portals[id] = { domId: id, targetId: target, opened: options.opened };
+ // Close other open portals — skip parent portals that contain our anchor.
+ const toClose = [];
+ for (const p of Object.values(window.testgen.portals)) {
+ if (p.domId !== id && p.opened?.rawVal) {
+ const otherEl = document.getElementById(p.domId);
+ if (otherEl?.contains(anchor)) continue;
+ toClose.push(p);
+ }
+ }
+ if (toClose.length) {
+ queueMicrotask(() => toClose.forEach(p => { p.opened.val = false; }));
+ }
- return () => {
+ if (!outsideClickHandler) {
+ outsideClickHandler = (event) => {
+ const anchor = document.getElementById(target);
+ const portalEl = document.getElementById(id);
+ if (portalEl?.contains(event.target)) return;
+ if (anchor?.contains(event.target)) return;
+ if (isClickInsideChildPortal(event.target, id, portalEl)) return;
+ close();
+ };
+ document.addEventListener('click', outsideClickHandler, true);
+ }
+ });
+
+ // DOM rendering: a VanJS binding on document.body.
+ // VanJS manages the element lifecycle natively — no manual createElement/remove.
+ van.add(document.body, () => {
if (!getValue(options.opened)) {
return '';
}
const anchor = document.getElementById(target);
+ if (!anchor) return '';
+
+ const fixed = hasFixedAncestor(anchor);
+ const fromDialog = hasStreamlitDialogAncestor(anchor);
+ const parentPortalEl = getParentPortalElement(anchor, id);
+ const zIndex = parentPortalEl
+ ? (parseInt(parentPortalEl.style.zIndex) || 1001) + 1
+ : fromDialog ? (STREAMLIT_DIALOG_ZINDEX + 1) : 1001;
+ const coords = position === 'bottom'
+ ? calculateBottomPosition(anchor, align, fixed)
+ : calculateTopPosition(anchor, align, fixed);
+
return div(
{
id,
class: getValue(options.class) ?? '',
- style: `position: absolute;
- z-index: 99;
- ${position === 'bottom' ? calculateBottomPosition(anchor, align, targetRelative) : calculateTopPosition(anchor, align, targetRelative)}
- ${getValue(options.style)}`,
+ style: `position: ${fixed ? 'fixed' : 'absolute'}; z-index: ${zIndex}; ${coords} ${getValue(options.style) ?? ''}`,
},
...args,
);
- };
+ });
+
+ return '';
};
-function calculateTopPosition(anchor, align, targetRelative) {
- const anchorRect = anchor.getBoundingClientRect();
- const bottom = (targetRelative ? anchorRect.height : anchorRect.top);
- const left = targetRelative ? 0 : anchorRect.left;
- const right = targetRelative ? 0 : (window.innerWidth - anchorRect.right);
+function getParentPortalElement(anchor, selfId) {
+ for (const p of Object.values(window.testgen.portals)) {
+ if (p.domId === selfId) continue;
+ const el = document.getElementById(p.domId);
+ if (el?.contains(anchor)) return el;
+ }
+ return null;
+}
- return `min-width: ${anchorRect.width}px; bottom: ${bottom}px; ${align === 'left' ? `left: ${left}px;` : `right: ${right}px;`}`;
+function isClickInsideChildPortal(target, selfId, selfPortalEl) {
+ for (const p of Object.values(window.testgen.portals)) {
+ if (p.domId === selfId) continue;
+ const childEl = document.getElementById(p.domId);
+ if (childEl?.contains(target)) {
+ const childAnchor = document.getElementById(p.targetId);
+ if (selfPortalEl?.contains(childAnchor)) return true;
+ }
+ }
+ return false;
}
-function calculateBottomPosition(anchor, align, targetRelative) {
- const anchorRect = anchor.getBoundingClientRect();
- const top = (targetRelative ? 0 : anchorRect.top) + anchorRect.height;
- const left = targetRelative ? 0 : anchorRect.left;
- const right = targetRelative ? 0 : (window.innerWidth - anchorRect.right);
+function hasFixedAncestor(el) {
+ let node = el.parentElement;
+ while (node && node !== document.body) {
+ if (getComputedStyle(node).position === 'fixed') return true;
+ node = node.parentElement;
+ }
+ return false;
+}
+
+function hasStreamlitDialogAncestor(el) {
+ let node = el.parentElement;
+ while (node && node !== document.body) {
+ if (node.classList.contains(STREAMLIT_DIALOG_CLASS)) return true;
+ node = node.parentElement;
+ }
+ return false;
+}
+
+function calculateBottomPosition(anchor, align, fixed = false) {
+ const r = anchor.getBoundingClientRect();
+ const top = fixed ? r.bottom : r.bottom + window.scrollY;
+ const left = fixed ? r.left : r.left + window.scrollX;
+ const right = window.innerWidth - r.right;
+ const constrain = fixed ? `max-height: calc(100vh - ${r.bottom}px - 8px); overflow-y: auto;` : '';
+ return `min-width: ${r.width}px; top: ${top}px; ${constrain} ${align === 'left' ? `left: ${left}px;` : `right: ${right}px;`}`;
+}
- return `min-width: ${anchorRect.width}px; top: ${top}px; ${align === 'left' ? `left: ${left}px;` : `right: ${right}px;`}`;
+function calculateTopPosition(anchor, align, fixed = false) {
+ const r = anchor.getBoundingClientRect();
+ const bottom = fixed ? window.innerHeight - r.top : window.innerHeight - r.top + window.scrollY;
+ const left = fixed ? r.left : r.left + window.scrollX;
+ const right = window.innerWidth - r.right;
+ const constrain = fixed ? `max-height: calc(${r.top}px - 8px); overflow-y: auto;` : '';
+ return `min-width: ${r.width}px; bottom: ${bottom}px; ${constrain} ${align === 'left' ? `left: ${left}px;` : `right: ${right}px;`}`;
}
export { Portal };
diff --git a/testgen/ui/static/js/components/radio_group.js b/testgen/ui/static/js/components/radio_group.js
index 4f8b0008..97aef2df 100644
--- a/testgen/ui/static/js/components/radio_group.js
+++ b/testgen/ui/static/js/components/radio_group.js
@@ -8,11 +8,13 @@
* @typedef Properties
* @type {object}
* @property {string} label
+ * @property {string?} help
* @property {Option[]} options
* @property {string | number | boolean | null} value
* @property {function(string | number | boolean | null)?} onChange
* @property {number?} width
* @property {('default' | 'inline' | 'vertical')?} layout
+ * @property {boolean?} disabled
*/
import van from '../van.min.js';
import { getRandomId, getValue, loadStylesheet } from '../utils.js';
@@ -26,12 +28,19 @@ const RadioGroup = (/** @type Properties */ props) => {
const groupName = getRandomId();
const layout = getValue(props.layout) ?? 'default';
+ const disabled = getValue(props.disabled) ?? false;
return div(
- { class: () => `tg-radio-group--wrapper ${layout}`, style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}` },
+ { class: () => `tg-radio-group--wrapper ${layout}${disabled ? ' disabled' : ''}`, style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}` },
div(
- { class: 'text-caption tg-radio-group--label' },
+ { class: 'text-caption tg-radio-group--label flex-row fx-gap-1' },
props.label,
+ () => getValue(props.help)
+ ? withTooltip(
+ Icon({ size: 16, classes: 'text-disabled' }, 'help'),
+ { text: props.help, position: 'top', width: 200 }
+ )
+ : null,
),
() => div(
{ class: 'tg-radio-group' },
@@ -42,6 +51,7 @@ const RadioGroup = (/** @type Properties */ props) => {
name: groupName,
value: option.value,
checked: () => option.value === getValue(props.value),
+ disabled,
onchange: van.derive(() => {
const onChange = props.onChange?.val ?? props.onChange;
return onChange ? () => onChange(option.value) : null;
@@ -149,6 +159,11 @@ stylesheet.replace(`
border-radius: 5px;
}
+.tg-radio-group--wrapper.disabled {
+ opacity: 0.5;
+ pointer-events: none;
+}
+
.tg-radio-group--help {
white-space: pre-wrap;
line-height: 16px;
diff --git a/testgen/ui/static/js/components/score_issues.js b/testgen/ui/static/js/components/score_issues.js
index 659f8020..bcab1146 100644
--- a/testgen/ui/static/js/components/score_issues.js
+++ b/testgen/ui/static/js/components/score_issues.js
@@ -159,7 +159,7 @@ const IssuesTable = (
category === 'column_name'
? span({ class: 'ml-2' })
: ColumnProfilingButton(row.column, row.table, row.table_group_id),
- columns.map((columnName) => TableCell(row, columnName)),
+ columns.map((columnName) => TableCell(row, columnName, score.project_code)),
)),
() => Paginator({
pageIndex,
@@ -192,7 +192,7 @@ const ColumnProfilingButton = (
style: 'color: var(--secondary-text-color);',
tooltip: 'View profiling for column',
tooltipPosition: 'top-right',
- onclick: () => emitEvent('ColumnProflingClicked', { payload: { column_name, table_name, table_group_id } }),
+ onclick: () => emitEvent('ColumnProfilingClicked', { payload: { column_name, table_name, table_group_id } }),
});
};
@@ -253,13 +253,13 @@ const Toolbar = (
* @param {string} column
* @returns {}
*/
-const TableCell = (row, column) => {
+const TableCell = (row, column, projectCode) => {
const componentByColumn = {
column: IssueColumnCell,
type: IssueCell,
status: StatusCell,
detail: DetailCell,
- time: TimeCell,
+ time: (value, row) => TimeCell(value, row, projectCode),
};
if (componentByColumn[column]) {
@@ -306,7 +306,7 @@ const DetailCell = (value, row) => {
);
};
-const TimeCell = (value, row) => {
+const TimeCell = (value, row, projectCode) => {
return div(
{ class: 'flex-column', style: `flex: 0 0 ${ISSUES_COLUMNS_SIZES.time}` },
row.issue_type === 'test'
@@ -321,6 +321,7 @@ const TimeCell = (value, row) => {
table_name: row.table,
column_name: row.column,
selected: row.id,
+ project_code: projectCode,
},
}),
);
diff --git a/testgen/ui/static/js/components/select.js b/testgen/ui/static/js/components/select.js
index 3e3e658c..cea87c88 100644
--- a/testgen/ui/static/js/components/select.js
+++ b/testgen/ui/static/js/components/select.js
@@ -4,6 +4,7 @@
* @property {string} label
* @property {string} value
* @property {string?} icon
+ * @property {string?} caption
*
* @typedef Properties
* @type {object}
@@ -185,7 +186,7 @@ const Select = (/** @type {Properties} */ props) => {
),
Portal(
- {target: domId.val, targetRelative: true, position: props.portalPosition?.val ?? props?.portalPosition, opened},
+ {target: domId.val, position: props.portalPosition?.val ?? props?.portalPosition, opened},
() => div(
{
class: () => `tg-select--options-wrapper mt-1 ${getValue(props.portalClass) ?? ''}`,
@@ -194,17 +195,19 @@ const Select = (/** @type {Properties} */ props) => {
getValue(filteredOptions).map(option =>
div(
{
- class: () => `tg-select--option ${getValue(value) === option.value ? 'selected' : ''}`,
+ class: () => `tg-select--option flex-column fx-justify-center ${getValue(value) === option.value ? 'selected' : ''} ${option.caption ? 'has-caption' : ''}`,
onclick: (/** @type Event */ event) => {
changeSelection(option);
event.stopPropagation();
},
'data-testid': 'select-options-item',
},
- option.icon
- ? Icon({ classes: 'mr-2' }, option.icon)
- : undefined,
- span(option.label),
+ div(
+ {class: 'flex-row fx-gap-2'},
+ option.icon ? Icon({}, option.icon) : '',
+ span(option.label),
+ ),
+ option.caption ? span({class: 'text-small text-secondary'}, option.caption) : '',
)
),
),
@@ -288,7 +291,7 @@ const MultiSelect = (props) => {
),
Portal(
- {target: domId.val, targetRelative: true, position: props.portalPosition?.val ?? props?.portalPosition, opened},
+ {target: domId.val, position: props.portalPosition?.val ?? props?.portalPosition, opened},
() => div(
{
class: () => `tg-select--options-wrapper mt-1 ${getValue(props.portalClass) ?? ''}`,
@@ -408,8 +411,6 @@ stylesheet.replace(`
}
.tg-select--option {
- display: flex;
- align-items: center;
height: 40px;
padding: 0px 16px;
cursor: pointer;
@@ -420,6 +421,11 @@ stylesheet.replace(`
background: var(--select-hover-background);
}
+.tg-select--option.has-caption {
+ height: auto;
+ padding: 2px 16px;
+}
+
.tg-select--option.selected {
background: var(--select-hover-background);
color: var(--primary-color);
diff --git a/testgen/ui/static/js/components/table.js b/testgen/ui/static/js/components/table.js
index c3ae90c1..58185fb2 100644
--- a/testgen/ui/static/js/components/table.js
+++ b/testgen/ui/static/js/components/table.js
@@ -43,6 +43,7 @@
* @property {string?} class
* @property {((row: any, index: number) => string)?} rowClass
* @property {string?} height
+ * @property {string?} maxHeight
* @property {string?} width
* @property {boolean?} highDensity
* @property {boolean?} dynamicWidth
@@ -157,7 +158,7 @@ const Table = (options, rows) => {
return div(
{
class: () => `tg-table flex-column border border-radius-1 ${getValue(options.highDensity) ? 'tg-table-high-density' : ''} ${getValue(options.dynamicWidth) ? 'tg-table-dynamic-width' : ''} ${options.onRowsSelected ? 'tg-table-hoverable' : ''}`,
- style: () => `height: ${getValue(options.height) ? getValue(options.height) + 'px' : defaultHeight};`,
+ style: () => `height: ${getValue(options.height) ? getValue(options.height) : defaultHeight}; ${getValue(options.maxHeight) ? 'max-height: ' + getValue(options.maxHeight) + ';' : ''}`,
},
options.header,
div(
@@ -201,7 +202,7 @@ const Table = (options, rows) => {
{class: 'tg-table-empty-state-body'},
tr(
td(
- {colspan: dataColumns.length},
+ {colspan: dataColumns.val.length},
options.emptyState,
),
),
diff --git a/testgen/ui/static/js/components/table_group_form.js b/testgen/ui/static/js/components/table_group_form.js
index 6b072255..8ba8b414 100644
--- a/testgen/ui/static/js/components/table_group_form.js
+++ b/testgen/ui/static/js/components/table_group_form.js
@@ -14,6 +14,7 @@
* @property {string?} profile_sk_column_mask
* @property {number?} profiling_delay_days
* @property {boolean?} profile_flag_cdes
+ * @property {boolean?} profile_flag_pii
* @property {boolean?} include_in_dashboard
* @property {boolean?} add_scorecard_definition
* @property {boolean?} profile_use_sampling
@@ -41,6 +42,7 @@
* @property {boolean?} showConnectionSelector
* @property {boolean?} disableConnectionSelector
* @property {boolean?} disableSchemaField
+ * @property {boolean?} disablePiiFlag
* @property {(tg: TableGroup, state: FormState) => void} onChange
*/
import van from '../van.min.js';
@@ -81,6 +83,8 @@ const TableGroupForm = (props) => {
const profileSkColumnMask = van.state(tableGroup.profile_sk_column_mask ?? '%_sk');
const profilingDelayDays = van.state(tableGroup.profiling_delay_days ?? 0);
const profileFlagCdes = van.state(tableGroup.profile_flag_cdes ?? true);
+ const profileFlagPii = van.state(tableGroup.profile_flag_pii ?? true);
+ const profileExcludeXde = van.state(tableGroup.profile_exclude_xde ?? true);
const includeInDashboard = van.state(tableGroup.include_in_dashboard ?? true);
const addScorecardDefinition = van.state(tableGroup.add_scorecard_definition ?? true);
const profileUseSampling = van.state(tableGroup.profile_use_sampling ?? false);
@@ -120,6 +124,8 @@ const TableGroupForm = (props) => {
profile_sk_column_mask: profileSkColumnMask.val,
profiling_delay_days: profilingDelayDays.val,
profile_flag_cdes: profileFlagCdes.val,
+ profile_flag_pii: profileFlagPii.val,
+ profile_exclude_xde: profileExcludeXde.val,
include_in_dashboard: includeInDashboard.val,
add_scorecard_definition: addScorecardDefinition.val,
profile_use_sampling: profileUseSampling.val,
@@ -183,9 +189,11 @@ const TableGroupForm = (props) => {
profileSkColumnMask,
),
SettingsForm(
- { editMode: !!tableGroup.id, setValidity: setFieldValidity },
+ { editMode: !!tableGroup.id, disablePiiFlag: getValue(props.disablePiiFlag) ?? false, setValidity: setFieldValidity },
profilingDelayDays,
profileFlagCdes,
+ profileFlagPii,
+ profileExcludeXde,
includeInDashboard,
addScorecardDefinition,
),
@@ -325,6 +333,8 @@ const SettingsForm = (
options,
profilingDelayDays,
profileFlagCdes,
+ profileFlagPii,
+ profileExcludeXde,
includeInDashboard,
addScorecardDefinition,
) => {
@@ -339,6 +349,19 @@ const SettingsForm = (
checked: profileFlagCdes,
onChange: (value) => profileFlagCdes.val = value,
}),
+ Checkbox({
+ name: 'profile_flag_pii',
+ label: 'Detect PII during profiling',
+ checked: profileFlagPii,
+ onChange: (value) => profileFlagPii.val = value,
+ disabled: options.disablePiiFlag,
+ }),
+ Checkbox({
+ name: 'profile_exclude_xde',
+ label: 'Exclude XDE columns from profiling',
+ checked: profileExcludeXde,
+ onChange: (value) => profileExcludeXde.val = value,
+ }),
Checkbox({
name: 'include_in_dashboard',
label: 'Include table group in Project Dashboard',
diff --git a/testgen/ui/static/js/components/table_group_test.js b/testgen/ui/static/js/components/table_group_test.js
index ff987f06..94aa4898 100644
--- a/testgen/ui/static/js/components/table_group_test.js
+++ b/testgen/ui/static/js/components/table_group_test.js
@@ -111,7 +111,7 @@ const TableGroupTest = (preview, options) => {
),
)
: div(
- { class: 'flex-row fx-justify-center', style: 'height: 50px; font-size: 16px;'},
+ { class: 'flex-row fx-justify-center p-3', style: 'min-height: 50px; font-size: 14px;'},
tableGroupPreview.message ?? 'No tables found.'
),
),
diff --git a/testgen/ui/static/js/components/test_definition_form.js b/testgen/ui/static/js/components/test_definition_form.js
index 31812f87..18b173dc 100644
--- a/testgen/ui/static/js/components/test_definition_form.js
+++ b/testgen/ui/static/js/components/test_definition_form.js
@@ -51,6 +51,7 @@
* @property {string} default_parm_columns
* @property {string} default_parm_prompts
* @property {string} default_parm_help
+ * @property {string?} default_parm_required
* @property {string} default_severity
* @property {'column'|'referential'|'table'|'tablegroup'|'custom'} test_scope
* @property {string?} prediction
@@ -69,7 +70,7 @@ import { Select } from './select.js';
import { Textarea } from './textarea.js';
import { RadioGroup } from './radio_group.js';
import { Caption } from './caption.js';
-import { numberBetween } from '../form_validators.js';
+import { numberBetween, required } from '../form_validators.js';
const { div, span } = van.tags;
@@ -97,6 +98,7 @@ const TestDefinitionForm = (/** @type Properties */ props) => {
const paramColumns = (definition.default_parm_columns || '').split(',').map(v => v.trim());
const paramLabels = (definition.default_parm_prompts || '').split(',').map(v => v.trim());
const paramHelp = (definition.default_parm_help || '').split('|').map(v => v.trim());
+ const paramRequired = (definition.default_parm_required || '').split(',').map(v => v.trim().toUpperCase() === 'Y');
const hasThresholds = paramColumns.includes('history_calculation');
const dynamicParamColumns = paramColumns
@@ -105,6 +107,7 @@ const TestDefinitionForm = (/** @type Properties */ props) => {
column,
label: paramLabels[index] || column.replaceAll('_', ' '),
help: paramHelp[index] || null,
+ validators: paramRequired[index] ? [required] : undefined,
}))
.filter(config => !hasThresholds || !thresholdColumns.includes(config.column))
@@ -171,6 +174,7 @@ const TestDefinitionForm = (/** @type Properties */ props) => {
type: 'number',
value: currentValue(),
step: config.step,
+ validators: config.validators,
onChange: (value, state) => {
setFieldValues({ [column]: value || null })
setFieldValidity(column, state.valid);
@@ -188,8 +192,10 @@ const TestDefinitionForm = (/** @type Properties */ props) => {
help: config.help,
value: currentValue(),
height: 100,
- onChange: (value) => {
- setFieldValues({ [column]: value || null })
+ validators: config.validators,
+ onChange: (value, state) => {
+ setFieldValues({ [column]: value || null });
+ setFieldValidity(column, state.valid);
},
}),
);
@@ -202,6 +208,7 @@ const TestDefinitionForm = (/** @type Properties */ props) => {
label: config.label,
help: config.help,
value: currentValue(),
+ validators: config.validators,
onChange: (value, state) => {
setFieldValues({ [column]: value || null })
setFieldValidity(column, state.valid);
@@ -291,6 +298,21 @@ const ThresholdForm = (options, definition) => {
'lower_tolerance': newMode === 'static' ? lowerTolerance.val : newMode === 'prediction' ? definition.lower_tolerance : null,
'upper_tolerance': newMode === 'static' ? upperTolerance.val : newMode === 'prediction' ? definition.upper_tolerance : null,
});
+ if (newMode === 'static') {
+ if (!isFreshnessTrend) {
+ setFieldValidity('lower_tolerance', !!lowerTolerance.val);
+ }
+ setFieldValidity('upper_tolerance', !!upperTolerance.val);
+ setFieldValidity('history_lookback', true);
+ } else if (newMode === 'historical') {
+ setFieldValidity('lower_tolerance', true);
+ setFieldValidity('upper_tolerance', true);
+ setFieldValidity('history_lookback', !!historyLookback.val);
+ } else {
+ setFieldValidity('lower_tolerance', true);
+ setFieldValidity('upper_tolerance', true);
+ setFieldValidity('history_lookback', true);
+ }
},
}),
() => {
@@ -376,8 +398,8 @@ const ThresholdForm = (options, definition) => {
if (mode.val === 'static') {
return div(
- { class: 'flex-row fx-gap-3 fx-flex-wrap mt-2' },
- !isFreshnessTrend
+ { class: 'flex-row fx-gap-3 fx-flex-wrap fx-align-flex-start mt-2' },
+ !isFreshnessTrend
? div(
{ class: 'td-form--field' },
Input({
@@ -385,6 +407,7 @@ const ThresholdForm = (options, definition) => {
label: 'Lower Bound',
type: 'number',
value: lowerTolerance,
+ validators: [required],
onChange: (value, state) => {
lowerTolerance.val = value;
setFieldValues({ lower_tolerance: value });
@@ -400,6 +423,7 @@ const ThresholdForm = (options, definition) => {
label: isFreshnessTrend ? 'Maximum interval since last update (minutes)' : 'Upper Bound',
type: 'number',
value: upperTolerance,
+ validators: [required],
onChange: (value, state) => {
upperTolerance.val = value;
setFieldValues({ upper_tolerance: value });
diff --git a/testgen/ui/static/js/components/textarea.js b/testgen/ui/static/js/components/textarea.js
index 828d8c86..bdfc411a 100644
--- a/testgen/ui/static/js/components/textarea.js
+++ b/testgen/ui/static/js/components/textarea.js
@@ -1,4 +1,11 @@
/**
+ * @import { Validator } from '../form_validators.js';
+ *
+ * @typedef InputState
+ * @type {object}
+ * @property {boolean} valid
+ * @property {string[]} errors
+ *
* @typedef Properties
* @type {object}
* @property {string?} id
@@ -16,13 +23,14 @@
* @property {number?} width
* @property {number?} height
* @property {string?} testId
+ * @property {Array?} validators
*/
import van from '../van.min.js';
-import { debounce, getValue, loadStylesheet, getRandomId } from '../utils.js';
+import { debounce, getValue, loadStylesheet, getRandomId, checkIsRequired } from '../utils.js';
import { Icon } from './icon.js';
import { withTooltip } from './tooltip.js';
-const { div, label, textarea } = van.tags;
+const { div, label, textarea, small, span } = van.tags;
const defaultHeight = 64;
const Textarea = (/** @type Properties */ props) => {
@@ -30,18 +38,31 @@ const Textarea = (/** @type Properties */ props) => {
const domId = van.derive(() => getValue(props.id) ?? getRandomId());
const value = van.derive(() => getValue(props.value) ?? '');
+ const errors = van.derive(() => {
+ const validators = getValue(props.validators) ?? [];
+ return validators.map(v => v(value.val)).filter(error => error);
+ });
+ const firstError = van.derive(() => {
+ return errors.val[0] ?? '';
+ });
+ const isRequired = van.state(false);
+ const isDirty = van.state(false);
const onChange = props.onChange?.val ?? props.onChange;
if (onChange) {
- onChange(value.val);
+ onChange(value.val, { errors: errors.val, valid: errors.val.length <= 0 });
}
van.derive(() => {
const onChange = props.onChange?.val ?? props.onChange;
- if (onChange && value.val !== value.oldVal) {
- onChange(value.val);
+ if (onChange && (value.val !== value.oldVal || errors.val.length !== errors.oldVal.length)) {
+ onChange(value.val, { errors: errors.val, valid: errors.val.length <= 0 });
}
});
+ van.derive(() => {
+ isRequired.val = checkIsRequired(getValue(props.validators) ?? []);
+ });
+
return label(
{
id: domId,
@@ -52,6 +73,9 @@ const Textarea = (/** @type Properties */ props) => {
div(
{ class: 'flex-row fx-gap-1 text-caption' },
props.label,
+ () => isRequired.val
+ ? span({ class: 'text-error' }, '*')
+ : '',
() => getValue(props.help)
? withTooltip(
Icon({ size: 16, classes: 'text-disabled' }, 'help'),
@@ -66,8 +90,15 @@ const Textarea = (/** @type Properties */ props) => {
name: props.name ?? '',
disabled: props.disabled,
placeholder: () => getValue(props.placeholder) ?? '',
- oninput: debounce((/** @type Event */ event) => value.val = event.target.value, 300),
+ oninput: debounce((/** @type Event */ event) => {
+ isDirty.val = true;
+ value.val = event.target.value;
+ }, 300),
}),
+ () =>
+ isDirty.val && firstError.val
+ ? small({ class: 'tg-textarea--error' }, firstError)
+ : '',
);
};
@@ -96,6 +127,11 @@ stylesheet.replace(`
outline: none;
border-color: var(--primary-color);
}
+
+.tg-textarea--error {
+ height: 12px;
+ color: var(--error-color);
+}
`);
export { Textarea };
diff --git a/testgen/ui/static/js/components/toggle.js b/testgen/ui/static/js/components/toggle.js
index 0a635c7c..8d3fdbd4 100644
--- a/testgen/ui/static/js/components/toggle.js
+++ b/testgen/ui/static/js/components/toggle.js
@@ -4,6 +4,7 @@
* @property {string} label
* @property {string?} name
* @property {boolean?} checked
+ * @property {boolean?} disabled
* @property {string?} style
* @property {function(boolean)?} onChange
*/
@@ -15,14 +16,17 @@ const { input, label } = van.tags;
const Toggle = (/** @type Properties */ props) => {
loadStylesheet('toggle', stylesheet);
+ const disabled = props.disabled?.val ?? props.disabled ?? false;
+
return label(
- { class: 'flex-row fx-gap-2 clickable', style: props.style ?? '', 'data-testid': props.name ?? '' },
+ { class: `flex-row fx-gap-2 ${disabled ? '' : 'clickable'}`, style: props.style ?? '', 'data-testid': props.name ?? '' },
input({
type: 'checkbox',
role: 'switch',
class: 'tg-toggle--input clickable',
name: props.name ?? '',
checked: props.checked,
+ disabled,
onchange: van.derive(() => {
const onChange = props.onChange?.val ?? props.onChange;
return onChange ? (/** @type Event */ event) => onChange(event.target.checked) : null;
@@ -84,6 +88,11 @@ stylesheet.replace(`
.tg-toggle--input:checked::after {
left: 14px;
}
+
+.tg-toggle--input:disabled {
+ opacity: 0.5;
+ cursor: not-allowed;
+}
`);
export { Toggle };
diff --git a/testgen/ui/static/js/components/tooltip.js b/testgen/ui/static/js/components/tooltip.js
index e3b23a39..6da8c523 100644
--- a/testgen/ui/static/js/components/tooltip.js
+++ b/testgen/ui/static/js/components/tooltip.js
@@ -18,6 +18,8 @@ import { getValue, loadStylesheet } from '../utils.js';
const { div, span } = van.tags;
const defaultPosition = 'top';
+const STREAMLIT_DIALOG_ZINDEX = 1000060;
+const STREAMLIT_DIALOG_CLASS = 'stDialog';
const Tooltip = (/** @type Properties */ props) => {
loadStylesheet('tooltip', stylesheet);
@@ -32,17 +34,80 @@ const Tooltip = (/** @type Properties */ props) => {
);
};
+const computeTooltipStyle = (rect, position) => {
+ const cx = rect.left + rect.width / 2;
+ const cy = rect.top + rect.height / 2;
+ const gap = 5;
+
+ const variants = {
+ 'top': { left: cx, top: rect.top, transform: `translateX(-50%) translateY(calc(-100% - ${gap}px))` },
+ 'top-left': { left: cx + 20, top: rect.top, transform: `translateX(-100%) translateY(calc(-100% - ${gap}px))` },
+ 'top-right': { left: cx - 20, top: rect.top, transform: `translateY(calc(-100% - ${gap}px))` },
+ 'bottom': { left: cx, top: rect.bottom, transform: `translateX(-50%) translateY(${gap}px)` },
+ 'bottom-left': { left: cx + 20, top: rect.bottom, transform: `translateX(-100%) translateY(${gap}px)` },
+ 'bottom-right': { left: cx - 20, top: rect.bottom, transform: `translateY(${gap}px)` },
+ 'right': { left: rect.right, top: cy, transform: `translateX(${gap}px) translateY(-50%)` },
+ 'left': { left: rect.left, top: cy, transform: `translateX(calc(-100% - ${gap}px)) translateY(-50%)` },
+ };
+
+ const { left, top, transform } = variants[position] || variants['top'];
+ return `position: fixed; left: ${left}px; top: ${top}px; bottom: auto; right: auto; transform: ${transform};`;
+};
+
const withTooltip = (/** @type HTMLElement */ component, /** @type Properties */ tooltipProps) => {
+ loadStylesheet('tooltip', stylesheet);
+
const showTooltip = van.state(false);
- const tooltip = Tooltip({ ...tooltipProps, show: showTooltip });
+ const positionStyle = van.state('');
+ const zIndex = van.state(9999);
+
+ const tooltipEl = span(
+ {
+ class: () => `tg-tooltip portal ${getValue(tooltipProps.position) || defaultPosition} ${showTooltip.val ? '' : 'hidden'}`,
+ style: () => `opacity: ${showTooltip.val ? 1 : 0}; pointer-events: none; z-index: ${zIndex.val ?? 9999}; max-width: ${getValue(tooltipProps.width) || '400'}px; ${positionStyle.val}${getValue(tooltipProps.style) ?? ''}`,
+ },
+ tooltipProps.text,
+ div({ class: 'tg-tooltip--triangle' }),
+ );
- component.onmouseenter = () => showTooltip.val = true;
- component.onmouseleave = () => showTooltip.val = false;
- component.appendChild(tooltip);
+ van.add(document.body, tooltipEl);
+
+ requestAnimationFrame(() => {
+ if (!component.isConnected) return;
+
+ if (hasStreamlitDialogAncestor(component)) {
+ zIndex.val = STREAMLIT_DIALOG_ZINDEX + 1;
+ }
+
+ const observer = new MutationObserver(() => {
+ if (!component.isConnected) {
+ tooltipEl.remove();
+ observer.disconnect();
+ }
+ });
+ observer.observe(document.body, { childList: true, subtree: true });
+ });
+
+ component.addEventListener('mouseenter', () => {
+ positionStyle.val = computeTooltipStyle(component.getBoundingClientRect(), getValue(tooltipProps.position) || defaultPosition);
+ showTooltip.val = true;
+ });
+ component.addEventListener('mouseleave', () => {
+ showTooltip.val = false;
+ });
return component;
};
+function hasStreamlitDialogAncestor(el) {
+ let node = el.parentElement;
+ while (node && node !== document.body) {
+ if (node.classList.contains(STREAMLIT_DIALOG_CLASS)) return true;
+ node = node.parentElement;
+ }
+ return false;
+}
+
const stylesheet = new CSSStyleSheet();
stylesheet.replace(`
.tg-tooltip {
@@ -60,6 +125,15 @@ stylesheet.replace(`
transition: opacity 0.3s;
}
+.tg-tooltip.portal {
+ position: fixed;
+ top: unset;
+ bottom: unset;
+ left: unset;
+ right: unset;
+ transform: unset;
+}
+
.tg-tooltip--triangle {
width: 0;
height: 0;
diff --git a/testgen/ui/static/js/components/tree.js b/testgen/ui/static/js/components/tree.js
index 82acc371..fbf77c9c 100644
--- a/testgen/ui/static/js/components/tree.js
+++ b/testgen/ui/static/js/components/tree.js
@@ -6,8 +6,9 @@
* @property {string?} classes
* @property {string?} icon
* @property {number?} iconSize
- * @property {'red'?} iconColor
+ * @property {string?} iconClass
* @property {string?} iconTooltip
+ * @property {Element?} prefix
* @property {TreeNode[]?} children
* @property {number?} level
* @property {boolean?} expanded
@@ -91,7 +92,7 @@ const Tree = (/** @type Properties */ props, /** @type any? */ searchOptionsCont
},
Toolbar(treeNodes, multiSelect, props, searchOptionsContent, filtersContent),
div(
- { class: 'tg-tree' },
+ { class: () => `tg-tree ${multiSelect.val ? 'multi-select' : ''}` },
() => div(
{
class: 'tg-tree--nodes',
@@ -312,9 +313,10 @@ const TreeNode = (
span({ class: 'mr-1' }),
]
: null,
+ !multiSelect && node.prefix ? node.prefix : null,
() => {
if (node.icon) {
- const icon = Icon({ size: node.iconSize, classes: `tg-tree--row-icon ${node.iconColor}` }, node.icon);
+ const icon = Icon({ size: node.iconSize, classes: `tg-tree--row-icon ${node.iconClass}` }, node.icon);
return node.iconTooltip ? withTooltip(icon, { text: node.iconTooltip, position: 'right' }) : icon;
}
return null;
@@ -519,10 +521,6 @@ stylesheet.replace(`
color: #B0BEC5;
text-align: center;
}
-
-.tg-tree--row-icon.red {
- color: var(--red);
-}
`);
export { Tree };
diff --git a/testgen/ui/static/js/components/wizard_progress_indicator.js b/testgen/ui/static/js/components/wizard_progress_indicator.js
index 88bbb789..80e35703 100644
--- a/testgen/ui/static/js/components/wizard_progress_indicator.js
+++ b/testgen/ui/static/js/components/wizard_progress_indicator.js
@@ -14,14 +14,15 @@
*
* @param {WizardStepMeta[]} steps
* @param {CurrentStep} currentStep
- * @returns
+ * @param {function(string)?} onStepClick
+ * @returns
*/
import van from '../van.min.js';
import { colorMap } from '../display_utils.js';
const { div, i, span } = van.tags;
-const WizardProgressIndicator = (steps, currentStep) => {
+const WizardProgressIndicator = (steps, currentStep, onStepClick) => {
const currentPhysicalIndex = steps.findIndex(s => s.includedSteps.includes(currentStep.name));
const progressWidth = van.state('0px');
@@ -50,8 +51,12 @@ const WizardProgressIndicator = (steps, currentStep) => {
z-index: -4;
`;
- const currentStepIndicator = (title, stepIndex) => div(
- { class: `flex-column fx-align-flex-center fx-gap-1 step-icon-current`, style: 'position: relative;' },
+ const currentStepIndicator = (title, stepIndex, step) => div(
+ {
+ class: `flex-column fx-align-flex-center fx-gap-1 step-icon-current`,
+ style: `position: relative; ${onStepClick ? 'cursor: pointer;' : ''}`,
+ onclick: () => onStepClick?.(step.includedSteps[0]),
+ },
stepIndex === 0
? div({ style: 'position: absolute; width: 50%; height: 50%; left: 0px; background: var(--dk-dialog-background); z-index: -1;' }, '')
: '',
@@ -66,7 +71,10 @@ const WizardProgressIndicator = (steps, currentStep) => {
);
const pendingStepIndicator = (title, stepIndex) => div(
- { class: `flex-column fx-align-flex-center fx-gap-1 ${currentPhysicalIndex === stepIndex ? 'step-icon-current' : 'text-secondary'}`, style: 'position: relative;' },
+ {
+ class: `flex-column fx-align-flex-center fx-gap-1 ${currentPhysicalIndex === stepIndex ? 'step-icon-current' : 'text-secondary'}`,
+ style: 'position: relative; cursor: default;',
+ },
stepIndex === 0
? div({ style: 'position: absolute; width: 50%; height: 50%; left: 0px; background: var(--dk-dialog-background); z-index: -1;' }, '')
: '',
@@ -80,8 +88,12 @@ const WizardProgressIndicator = (steps, currentStep) => {
span({}, title),
);
- const completedStepIndicator = (title, stepIndex) => div(
- { class: `flex-column fx-align-flex-center fx-gap-1 ${currentPhysicalIndex === stepIndex ? 'step-icon-current' : 'text-secondary'}`, style: 'position: relative;' },
+ const completedStepIndicator = (title, stepIndex, step) => div(
+ {
+ class: `flex-column fx-align-flex-center fx-gap-1 ${currentPhysicalIndex === stepIndex ? 'step-icon-current' : 'text-secondary'}`,
+ style: `position: relative; ${onStepClick ? 'cursor: pointer;' : ''}`,
+ onclick: () => onStepClick?.(step.includedSteps[0]),
+ },
stepIndex === 0
? div({ style: 'position: absolute; width: 50%; height: 50%; left: 0px; background: var(--dk-dialog-background); z-index: -1;' }, '')
: '',
@@ -134,9 +146,9 @@ const WizardProgressIndicator = (steps, currentStep) => {
...steps.map((step, physicalIdx) => {
if (step.index < currentStep.index) {
if (step.skipped) return skippedStepIndicator(step.title, physicalIdx);
- return completedStepIndicator(step.title, physicalIdx);
+ return completedStepIndicator(step.title, physicalIdx, step);
} else if (step.includedSteps.includes(currentStep.name)) {
- return currentStepIndicator(step.title, physicalIdx);
+ return currentStepIndicator(step.title, physicalIdx, step);
} else {
return pendingStepIndicator(step.title, physicalIdx);
}
diff --git a/testgen/ui/static/js/display_utils.js b/testgen/ui/static/js/display_utils.js
index c590c9a0..8dc0c9f5 100644
--- a/testgen/ui/static/js/display_utils.js
+++ b/testgen/ui/static/js/display_utils.js
@@ -2,6 +2,9 @@ function formatTimestamp(
/** @type number | string */ timestamp,
/** @type boolean */ showYear,
) {
+ if (timestamp === PII_REDACTED) {
+ return timestamp;
+ }
if (timestamp) {
let date = timestamp;
if (typeof timestamp === 'number') {
@@ -81,6 +84,9 @@ function humanReadableDuration(/** @type string */ duration, /** @type boolean *
}
function formatNumber(/** @type number | string */ number, /** @type number */ decimals = 3) {
+ if (number === PII_REDACTED) {
+ return number;
+ }
if (!['number', 'string'].includes(typeof number) || isNaN(number)) {
return '--';
}
@@ -173,6 +179,7 @@ const colorMap = {
}
const DISABLED_ACTION_TEXT = 'You do not have permissions to perform this action. Contact your administrator.';
+const PII_REDACTED = '[PII Redacted]';
export {
formatTimestamp,
@@ -187,4 +194,5 @@ export {
viewPortUnitsToPixels,
colorMap,
DISABLED_ACTION_TEXT,
+ PII_REDACTED,
};
diff --git a/testgen/ui/static/js/form_validators.js b/testgen/ui/static/js/form_validators.js
index 635b8b6a..58c085bb 100644
--- a/testgen/ui/static/js/form_validators.js
+++ b/testgen/ui/static/js/form_validators.js
@@ -120,6 +120,26 @@ function sizeLimit(limit) {
return validator;
}
+/**
+ * @typedef NotInOptions
+ * @type {object}
+ * @property {string?} errorMessage
+ * @property {((v: any) => any)?} formatter
+ * @property {string} a
+ *
+ * @param {any[]} values
+ * @param {NotInOptions?} options
+ * @returns {Validator}
+ */
+function notIn(values, options) {
+ return (value) => {
+ if (value && values.includes(!!options?.formatter ? options.formatter(value) : value)) {
+ return options?.errorMessage ?? `Value cannot be any of: ${values.join(', ')}.`;
+ }
+ return null;
+ };
+}
+
export {
maxLength,
minLength,
@@ -128,4 +148,5 @@ export {
required,
requiredIf,
sizeLimit,
+ notIn,
};
diff --git a/testgen/ui/static/js/sidebar.js b/testgen/ui/static/js/sidebar.js
index 9382b40f..a5770281 100644
--- a/testgen/ui/static/js/sidebar.js
+++ b/testgen/ui/static/js/sidebar.js
@@ -5,6 +5,7 @@
* @property {(string|null)} icon
* @property {string} label
* @property {(string|null)} page
+ * @property {(string|null)} permission
* @property {(Array.|null)} items
*
* @typedef Version
@@ -33,6 +34,8 @@
* @property {string} logout_path
* @property {Version} version
* @property {string} support_email
+ * @property {boolean} global_context
+ * @property {boolean} is_global_admin
*/
const van = window.top.van;
const { a, button, div, i, img, label, option, select, span } = van.tags;
@@ -55,29 +58,45 @@ const Sidebar = (/** @type {Properties} */ props) => {
{class: 'menu'},
div(
{class: 'fx-flex', style: 'overflow-y: auto;'},
+ // Project dropdown — hidden in global admin context
div(
- { class: 'menu--project' },
+ {
+ class: 'menu--project',
+ style: () => props.global_context?.val ? 'display: none' : '',
+ },
div({ class: 'caption' }, 'Project'),
() => props.projects.val.length > 1
? ProjectSelect(props.projects, currentProject)
: div(currentProject.val?.name ?? '...'),
),
() => {
- const menuItems = props.menu?.val.items || [];
- return div(
- {class: 'content'},
- menuItems.map(item =>
- item.items?.length > 0
- ? MenuSection(item, props.current_page, currentProject.val?.code)
- : MenuItem(item, props.current_page, currentProject.val?.code))
- );
+ const allItems = props.menu?.val.items || [];
+ if (props.global_context?.val) {
+ // Global admin context: only show global_admin permission items, flat
+ const adminItems = allItems.filter(item => !item.items && item.permission === 'global_admin');
+ return div(
+ {class: 'content'},
+ adminItems.map(item => AdminMenuItem(item, props.current_page)),
+ );
+ } else {
+ // Project context: filter out global_admin items (they have no section, appear at root level)
+ const projectItems = allItems.filter(item => item.items || item.permission !== 'global_admin');
+ return div(
+ {class: 'content'},
+ projectItems.map(item =>
+ item.items?.length > 0
+ ? MenuSection(item, props.current_page, currentProject.val?.code)
+ : MenuItem(item, props.current_page, currentProject.val?.code)
+ ),
+ );
+ }
},
),
div(
div(
{ class: 'menu--user' },
span({class: 'menu--username', title: props.username}, props.username),
- span({class: 'menu--role'}, props.role.val?.replace('_', ' ')),
+ span({class: 'menu--role'}, () => props.role.val?.replace('_', ' ')),
),
div(
{ class: 'menu--buttons' },
@@ -100,6 +119,8 @@ const Sidebar = (/** @type {Properties} */ props) => {
) : null,
),
),
+ // Administration CTA — project context only, global admins only, opens in new tab
+ AdminCTA({ style: () => (!props.global_context?.val && props.is_global_admin?.val) ? '' : 'display: none' }),
);
};
@@ -184,6 +205,47 @@ const MenuItem = (
);
};
+// Menu item for global admin context (no project_code in navigation)
+const AdminMenuItem = (
+ /** @type {MenuItem} */ item,
+ /** @type {string} */ currentPage,
+) => {
+ const classes = van.derive(() => {
+ if (isCurrentPage(item.page, currentPage?.val)) {
+ return 'menu--item active';
+ }
+ return 'menu--item';
+ });
+
+ return a(
+ {
+ class: classes,
+ href: `/${item.page}`,
+ onclick: (event) => {
+ event.preventDefault();
+ event.stopPropagation();
+ emitEvent({ path: item.page, params: {} });
+ },
+ },
+ i({class: 'menu--item--icon material-symbols-rounded'}, item.icon),
+ span({class: 'menu--item--label'}, item.label),
+ );
+};
+
+// Single CTA shown in project context for global admins — opens admin area in new tab
+const AdminCTA = ({ style } = {}) => a(
+ {
+ class: 'menu--item menu--admin-cta',
+ href: '/admin-projects',
+ target: '_blank',
+ rel: 'noopener noreferrer',
+ style,
+ },
+ i({class: 'menu--item--icon material-symbols-rounded'}, 'admin_panel_settings'),
+ span({class: 'menu--item--label'}, 'Administration'),
+ i({class: 'menu--admin-cta--icon material-symbols-rounded'}, 'open_in_new'),
+);
+
function emitEvent(/** @type Object */ data) {
if (Sidebar.StreamlitInstance) {
Sidebar.StreamlitInstance.sendData({ ...data, _id: Math.random() }); // Identify the event so its handler is called once
@@ -326,7 +388,6 @@ stylesheet.replace(`
.menu .menu--buttons {
display: flex;
justify-content: space-between;
- margin-bottom: 16px;
}
.menu--buttons a {
@@ -365,6 +426,18 @@ button.tg-button:hover {
button.tg-button > i:has(+ span:not(.tg-tooltip)) {
margin-right: 8px;
}
+
+.menu--admin-cta {
+ margin-top: 4px;
+ border-top: 1px solid var(--disabled-text-color);
+}
+
+.menu--admin-cta--icon {
+ margin-left: auto;
+ font-size: 16px !important;
+ line-height: 16px !important;
+ opacity: 0.6;
+}
/* ... */
`);
diff --git a/testgen/ui/static/js/streamlit.js b/testgen/ui/static/js/streamlit.js
index a30ace8c..2b1c7995 100644
--- a/testgen/ui/static/js/streamlit.js
+++ b/testgen/ui/static/js/streamlit.js
@@ -7,14 +7,22 @@ const Streamlit = {
enableV2(handler) {
this._v2 = true;
this._customSendDataHandler = handler;
+ window.testgen = window.testgen || {};
+ window.testgen.isPage = true;
+ },
+ disableV2(handler) {
+ if (this._customSendDataHandler === handler) {
+ this._v2 = false;
+ this._customSendDataHandler = null;
+ }
},
setFrameHeight(height) {
- if (!this._v2) {
+ if (!this || !this._v2) {
sendMessageToStreamlit('streamlit:setFrameHeight', { height: height });
}
},
sendData(data) {
- if (this._v2) {
+ if (this && this._v2) {
const event = data.event;
const triggerData = Object.fromEntries(Object.entries(data).filter(([k, v]) => k !== 'event'));
this._customSendDataHandler(event, triggerData);
diff --git a/testgen/ui/static/js/utils.js b/testgen/ui/static/js/utils.js
index 5dc5560f..d71d6ece 100644
--- a/testgen/ui/static/js/utils.js
+++ b/testgen/ui/static/js/utils.js
@@ -78,7 +78,7 @@ const stateProto = Object.getPrototypeOf(van.state());
/**
* Get value from van.state
* @template T
- * @param {T} prop
+ * @param {(import('./van.min.js').VanState | T)} prop
* @returns {T}
*/
function getValue(prop) { // van state or static value
diff --git a/testgen/ui/views/connections.py b/testgen/ui/views/connections.py
index 0c69e992..c0089fca 100644
--- a/testgen/ui/views/connections.py
+++ b/testgen/ui/views/connections.py
@@ -18,7 +18,8 @@
import testgen.ui.services.database_service as db
from testgen.commands.run_profiling import run_profiling_in_background
from testgen.common.database.database_service import empty_cache, get_flavor_service
-from testgen.common.models import with_database_session
+from testgen.common.database.flavor.flavor_service import resolve_connection_params
+from testgen.common.models import get_current_session, with_database_session
from testgen.common.models.connection import Connection, ConnectionMinimal
from testgen.common.models.scheduler import RUN_MONITORS_JOB_KEY, RUN_TESTS_JOB_KEY, JobSchedule
from testgen.common.models.table_group import TableGroup
@@ -27,6 +28,7 @@
from testgen.ui.components import widgets as testgen
from testgen.ui.navigation.menu import MenuItem
from testgen.ui.navigation.page import Page
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.session import session, temp_value
from testgen.ui.utils import get_cron_sample_handler
@@ -65,7 +67,7 @@ class ConnectionsPage(Page):
def render(self, project_code: str, **_kwargs) -> None:
testgen.page_header(
PAGE_TITLE,
- "manage-connections",
+ "connect-your-database/manage-connections/",
)
connections = Connection.select_where(Connection.project_code == project_code)
@@ -170,8 +172,8 @@ def on_setup_table_group_clicked(*_args) -> None:
connection_string: str | None = None
flavor_service = get_flavor_service(connection.sql_flavor)
- flavor_service.init({**connection.to_dict(), "project_pw_encrypted": ""})
- connection_string = flavor_service.get_connection_string().replace("%3E", ">").replace("%3C", "<")
+ params = resolve_connection_params({**connection.to_dict(), "project_pw_encrypted": ""})
+ connection_string = flavor_service.get_connection_string(params).replace("%3E", ">").replace("%3C", "<")
if should_save():
success = True
@@ -179,7 +181,7 @@ def on_setup_table_group_clicked(*_args) -> None:
connection.save()
message = "Changes have been saved successfully."
except Exception as error:
- message = "Error creating connection"
+ message = "Something went wrong while creating the connection."
success = False
LOG.exception(message)
@@ -238,8 +240,8 @@ def _format_connection(self, connection: Connection, should_test: bool = False)
def test_connection(self, connection: Connection) -> "ConnectionStatus":
empty_cache()
try:
- sql_query = "select 1;"
- results = db.fetch_from_target_db(connection, sql_query)
+ flavor_service = get_flavor_service(connection.sql_flavor)
+ results = db.fetch_from_target_db(connection, flavor_service.test_query)
connection_successful = len(results) == 1 and results[0][0] == 1
if not connection_successful:
@@ -267,7 +269,7 @@ def test_connection(self, connection: Connection) -> "ConnectionStatus":
details = error.args[0]
return ConnectionStatus(message="Error attempting the connection.", details=details, successful=False)
except Exception as error:
- details = "Try again"
+ details = "Something went wrong while testing the connection."
if connection.connect_by_key and not connection.private_key:
details = "The private key is missing."
LOG.exception("Error testing database connection")
@@ -304,7 +306,7 @@ def on_close_clicked(_params: dict) -> None:
get_close_dialog, set_close_dialog = temp_value(f"connections:{connection_id}:close", default=False)
if (get_close_dialog()):
- st.rerun()
+ safe_rerun()
get_new_table_group, set_new_table_group = temp_value(
f"connections:{connection_id}:table_group",
@@ -439,6 +441,8 @@ def on_close_clicked(_params: dict) -> None:
predict_holiday_codes=monitor_test_suite_data.get("predict_holiday_codes") or None,
)
monitor_test_suite.save()
+ # Commit needed to make test suite visible to run_monitor_generation's separate DB connection
+ get_current_session().commit()
run_monitor_generation(monitor_test_suite.id, ["Volume_Trend", "Schema_Drift"])
JobSchedule(
@@ -466,9 +470,9 @@ def on_close_clicked(_params: dict) -> None:
LOG.exception(message)
else:
LOG.info("Table group %s created", table_group.id)
- st.rerun()
+ safe_rerun()
except Exception as error:
- message = "Error creating table group"
+ message = "Something went wrong while creating the table group."
success = False
LOG.exception(message)
@@ -495,6 +499,9 @@ def on_close_clicked(_params: dict) -> None:
data={
"project_code": project_code,
"table_group": table_group.to_dict(json_safe=True),
+ "permissions": {
+ "can_view_pii": session.auth.user_has_permission("view_pii"),
+ },
"table_group_preview": table_group_preview,
"steps": [
"tableGroup",
@@ -601,12 +608,24 @@ class ConnectionFlavor:
flavor="mssql",
icon=get_asset_data_url("flavors/mssql.svg"),
),
+ ConnectionFlavor(
+ label="Oracle",
+ value="oracle",
+ flavor="oracle",
+ icon=get_asset_data_url("flavors/oracle.svg"),
+ ),
ConnectionFlavor(
label="PostgreSQL",
value="postgresql",
flavor="postgresql",
icon=get_asset_data_url("flavors/postgresql.svg"),
),
+ ConnectionFlavor(
+ label="SAP HANA",
+ value="sap_hana",
+ flavor="sap_hana",
+ icon=get_asset_data_url("flavors/sap_hana.svg"),
+ ),
ConnectionFlavor(
label="Snowflake",
value="snowflake",
diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py
index c06c8b96..d89a4680 100644
--- a/testgen/ui/views/data_catalog.py
+++ b/testgen/ui/views/data_catalog.py
@@ -12,6 +12,7 @@
from testgen.common.models import with_database_session
from testgen.common.models.project import Project
from testgen.common.models.table_group import TableGroup, TableGroupMinimal
+from testgen.common.pii_masking import PII_REDACTED, get_pii_columns, mask_hygiene_detail, mask_profiling_pii
from testgen.ui.components import widgets as testgen
from testgen.ui.components.widgets import testgen_component
from testgen.ui.components.widgets.download_dialog import (
@@ -34,9 +35,11 @@
get_tables_by_table_group,
)
from testgen.ui.services.database_service import execute_db_query, fetch_all_from_db
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.session import session, temp_value
from testgen.ui.views.dialogs.column_history_dialog import column_history_dialog
from testgen.ui.views.dialogs.data_preview_dialog import data_preview_dialog
+from testgen.ui.views.dialogs.import_metadata_dialog import open_import_metadata_dialog
from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog
from testgen.ui.views.dialogs.table_create_script_dialog import table_create_script_dialog
from testgen.utils import friendly_score, is_uuid4, make_json_safe, score
@@ -54,13 +57,15 @@ class DataCatalogPage(Page):
]
menu_item = MenuItem(icon=PAGE_ICON, label=PAGE_TITLE, section="Data Profiling", order=0)
- def render(self, project_code: str, table_group_id: str | None = None, selected: str | None = None, **_kwargs) -> None:
+ def render(
+ self, project_code: str, table_group_id: str | None = None, selected: str | None = None, **_kwargs
+ ) -> None:
testgen.page_header(
PAGE_TITLE,
- "data-catalog",
+ "data-catalog/",
)
- _, loading_column = st.columns([.4, .6])
+ _, loading_column = st.columns([0.4, 0.6])
spinner_container = loading_column.container(key="data_catalog:spinner")
with spinner_container:
@@ -74,7 +79,7 @@ def render(self, project_code: str, table_group_id: str | None = None, selected:
user_can_navigate = session.auth.user_has_permission("view")
table_groups = TableGroup.select_minimal_where(TableGroup.project_code == project_code)
- if not table_group_id or table_group_id not in [ str(item.id) for item in table_groups ]:
+ if not table_group_id or table_group_id not in [str(item.id) for item in table_groups]:
table_group_id = str(table_groups[0].id) if table_groups else None
on_table_group_selected(table_group_id)
@@ -89,7 +94,7 @@ def render(self, project_code: str, table_group_id: str | None = None, selected:
selected_item["connection_id"] = str(selected_table_group.connection_id)
else:
on_item_selected(None)
-
+
testgen_component(
"data_catalog",
props={
@@ -99,7 +104,8 @@ def render(self, project_code: str, table_group_id: str | None = None, selected:
"value": str(table_group.id),
"label": table_group.table_groups_name,
"selected": table_group_id == str(table_group.id),
- } for table_group in table_groups
+ }
+ for table_group in table_groups
],
"columns": json.dumps(make_json_safe(columns)) if columns else None,
"selected_item": json.dumps(make_json_safe(selected_item)) if selected_item else None,
@@ -108,13 +114,20 @@ def render(self, project_code: str, table_group_id: str | None = None, selected:
"permissions": {
"can_edit": session.auth.user_has_permission("disposition"),
"can_navigate": user_can_navigate,
+ "can_view_pii": session.auth.user_has_permission("view_pii"),
},
+ "autoflag_settings": {
+ "profile_flag_cdes": selected_table_group.profile_flag_cdes,
+ "profile_flag_pii": selected_table_group.profile_flag_pii,
+ } if selected_table_group else None,
},
on_change_handlers={
"RunProfilingClicked": lambda _: run_profiling_dialog(
project_code=project_code,
table_group_id=selected_table_group.id,
- ) if selected_table_group else None,
+ )
+ if selected_table_group
+ else None,
"TableGroupSelected": on_table_group_selected,
"ItemSelected": on_item_selected,
"ExportClicked": lambda items: download_dialog(
@@ -140,32 +153,41 @@ def render(self, project_code: str, table_group_id: str | None = None, selected:
item["column_name"],
item["add_date"],
),
+ "ImportClicked": lambda _: open_import_metadata_dialog(str(selected_table_group.id))
+ if selected_table_group
+ else None,
+ "ExportCsvClicked": lambda _: export_metadata_csv(selected_table_group)
+ if selected_table_group
+ else None,
},
- event_handlers={ "TagsChanged": partial(on_tags_changed, spinner_container) },
+ event_handlers={"TagsChanged": partial(on_tags_changed, spinner_container, table_group_id)},
)
def on_table_group_selected(table_group_id: str | None) -> None:
- Router().set_query_params({ "table_group_id": table_group_id })
+ Router().set_query_params({"table_group_id": table_group_id})
def on_item_selected(item_id: str | None) -> None:
- Router().set_query_params({ "selected": item_id })
+ Router().set_query_params({"selected": item_id})
class ExportItem(typing.TypedDict):
id: str
type: typing.Literal["table", "column"]
-def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: TableGroupMinimal, items: list[ExportItem] | None) -> None:
+
+def get_excel_report_data(
+ update_progress: PROGRESS_UPDATE_TYPE, table_group: TableGroupMinimal, items: list[ExportItem] | None
+) -> None:
if items:
table_data = get_tables_by_id(
- table_ids=[ item["id"] for item in items if item["type"] == "table" ],
+ table_ids=[item["id"] for item in items if item["type"] == "table"],
include_tags=True,
include_active_tests=True,
)
column_data = get_columns_by_id(
- column_ids=[ item["id"] for item in items if item["type"] == "column" ],
+ column_ids=[item["id"] for item in items if item["type"] == "column"],
include_tags=True,
include_active_tests=True,
)
@@ -180,10 +202,18 @@ def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: Ta
include_tags=True,
include_active_tests=True,
)
-
data = pd.DataFrame(table_data + column_data)
- data = data.sort_values(by=["table_name", "ordinal_position"], na_position="first", key=lambda x: x.str.lower() if x.dtype == "object" else x)
+
+ if not session.auth.user_has_permission("view_pii"):
+ pii_columns = get_pii_columns(str(table_group.id))
+ mask_profiling_pii(data, pii_columns)
+
+ data = data.sort_values(
+ by=["table_name", "ordinal_position"],
+ na_position="first",
+ key=lambda x: x.str.lower() if x.dtype == "object" else x,
+ )
for key in ["datatype_suggestion"]:
data[key] = data[key].apply(lambda val: val.lower() if not pd.isna(val) else None)
@@ -192,11 +222,18 @@ def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: Ta
data[key] = data[key].apply(lambda val: round(val, 2) if not pd.isna(val) else None)
for key in ["min_date", "max_date", "add_date", "last_mod_date", "drop_date"]:
- data[key] = data[key].apply(
- lambda val: val.strftime("%b %-d %Y, %-I:%M %p") if not pd.isna(val) else None
- )
-
- for key in ["data_source", "source_system", "source_process", "business_domain", "stakeholder_group", "transform_level", "aggregation_level", "data_product"]:
+ data[key] = data[key].apply(lambda val: val.strftime("%b %-d %Y, %-I:%M %p") if not pd.isna(val) and not isinstance(val, str) else val)
+
+ for key in [
+ "data_source",
+ "source_system",
+ "source_process",
+ "business_domain",
+ "stakeholder_group",
+ "transform_level",
+ "aggregation_level",
+ "data_product",
+ ]:
data[key] = data.apply(
lambda row: row[key] or row[f"table_{key}"] or row.get(f"table_group_{key}"),
axis=1,
@@ -206,25 +243,31 @@ def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: Ta
data["general_type"] = data["general_type"].apply(lambda val: type_map.get(val))
data["critical_data_element"] = data.apply(
- lambda row: "Yes" if row["critical_data_element"] == True or row["table_critical_data_element"] == True else None,
+ lambda row: "Yes"
+ if row["critical_data_element"] == True or row["table_critical_data_element"] == True
+ else None,
axis=1,
)
+ data["excluded_data_element"] = data["excluded_data_element"].apply(lambda val: "Yes" if val else None)
+ data["pii_flag"] = data["pii_flag"].apply(lambda val: "Yes" if val else None)
data["top_freq_values"] = data["top_freq_values"].apply(
- lambda val: "\n".join([ f"{part.split(" | ")[1]} | {part.split(" | ")[0]}" for part in val[2:].split("\n| ") ])
- if not pd.isna(val)
- else None
+ lambda val: "\n".join([f"{part.split(" | ")[1]} | {part.split(" | ")[0]}" for part in val[2:].split("\n| ")])
+ if not pd.isna(val) and val != PII_REDACTED
+ else val
)
data["top_patterns"] = data["top_patterns"].apply(
- lambda val: "".join([ f"{part}{'\n' if index % 2 else ' | '}" for index, part in enumerate(val.split(" | ")) ])
- if not pd.isna(val)
- else None
+ lambda val: "".join([f"{part}{'\n' if index % 2 else ' | '}" for index, part in enumerate(val.split(" | "))])
+ if not pd.isna(val) and val != PII_REDACTED
+ else val
)
file_columns = {
"schema_name": {"header": "Schema"},
"table_name": {"header": "Table"},
"column_name": {"header": "Column"},
- "critical_data_element": {},
+ "critical_data_element": {"header": "Critical data element (CDE)"},
+ "pii_flag": {"header": "PII"},
+ "excluded_data_element": {"header": "Excluded data element (XDE)"},
"active_test_count": {"header": "Active tests"},
"ordinal_position": {"header": "Position"},
"general_type": {},
@@ -304,7 +347,7 @@ def remove_table_dialog(item: dict) -> None:
st.html(f"Are you sure you want to remove the table {item['table_name']} from the data catalog?")
st.warning("This action cannot be undone.")
- _, button_column = st.columns([.85, .15])
+ _, button_column = st.columns([0.85, 0.15])
with button_column:
testgen.button(
label="Remove",
@@ -326,29 +369,27 @@ def remove_table_dialog(item: dict) -> None:
st.success("Table has been removed.")
time.sleep(1)
- for func in [ get_table_group_columns, get_tag_values ]:
- func.clear()
st.session_state["data_catalog:last_saved_timestamp"] = datetime.now().timestamp()
- st.rerun()
+ safe_rerun()
-def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> FILE_DATA_TYPE:
+def on_tags_changed(spinner_container: DeltaGenerator, table_group_id: str, payload: dict) -> FILE_DATA_TYPE:
attributes = ["description"]
attributes.extend(TAG_FIELDS)
tags = payload["tags"]
- set_attributes = [ f"{key} = NULLIF(:{key}, '')" for key in attributes if key in tags ]
- params = { key: tags.get(key) or "" for key in attributes if key in tags }
+ set_attributes = [f"{key} = NULLIF(:{key}, '')" for key in attributes if key in tags]
+ params = {key: tags.get(key) or "" for key in attributes if key in tags}
if "critical_data_element" in tags:
set_attributes.append("critical_data_element = :critical_data_element")
params.update({"critical_data_element": tags.get("critical_data_element")})
- params["table_ids"] = [ item["id"] for item in payload["items"] if item["type"] == "table" ]
- params["column_ids"] = [ item["id"] for item in payload["items"] if item["type"] == "column" ]
+ params["table_ids"] = [item["id"] for item in payload["items"] if item["type"] == "table"]
+ params["column_ids"] = [item["id"] for item in payload["items"] if item["type"] == "column"]
with spinner_container:
with st.spinner("Saving tags"):
- if params["table_ids"]:
+ if params["table_ids"] and set_attributes:
execute_db_query(
f"""
WITH selected as (
@@ -364,6 +405,15 @@ def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> FILE_DA
)
if params["column_ids"]:
+ if "excluded_data_element" in tags:
+ set_attributes.append("excluded_data_element = :excluded_data_element")
+ params.update({"excluded_data_element": tags.get("excluded_data_element")})
+
+ # Prevent user from editing PII flag if they cannot view PII
+ if "pii_flag" in tags and session.auth.user_has_permission("view_pii"):
+ set_attributes.append("pii_flag = :pii_flag")
+ params.update({"pii_flag": tags.get("pii_flag")})
+
execute_db_query(
f"""
WITH selected as (
@@ -378,17 +428,90 @@ def on_tags_changed(spinner_container: DeltaGenerator, payload: dict) -> FILE_DA
params,
)
- for func in [ get_table_group_columns, get_table_by_id, get_column_by_id, get_tag_values ]:
- func.clear()
+ _disable_autoflags(table_group_id, payload.get("disable_flags"))
+
st.session_state["data_catalog:last_saved_timestamp"] = datetime.now().timestamp()
- st.rerun()
+ safe_rerun()
+
+
+def _disable_autoflags(table_group_id: str, disable_flags: list[str] | None) -> None:
+ if not disable_flags or not (table_group := TableGroup.get(table_group_id)):
+ return
+
+ changed = False
+ if "profile_flag_cdes" in disable_flags:
+ table_group.profile_flag_cdes = False
+ changed = True
+ if "profile_flag_pii" in disable_flags:
+ table_group.profile_flag_pii = False
+ changed = True
+
+ if changed:
+ table_group.save()
+
+
+def export_metadata_csv(table_group: TableGroupMinimal) -> None:
+ def _get_csv_data(update_progress: PROGRESS_UPDATE_TYPE) -> FILE_DATA_TYPE:
+ table_data = fetch_all_from_db(
+ f"""
+ SELECT table_name, '' AS column_name,
+ description,
+ critical_data_element,
+ {", ".join(TAG_FIELDS)}
+ FROM data_table_chars
+ WHERE table_groups_id = :table_group_id
+ ORDER BY LOWER(table_name)
+ """,
+ {"table_group_id": str(table_group.id)},
+ )
+
+ column_data = fetch_all_from_db(
+ f"""
+ SELECT c.table_name, c.column_name,
+ c.description,
+ c.critical_data_element,
+ c.excluded_data_element,
+ c.pii_flag,
+ {", ".join([ f"c.{tag}" for tag in TAG_FIELDS ])}
+ FROM data_column_chars c
+ LEFT JOIN data_table_chars t ON (c.table_id = t.table_id)
+ WHERE c.table_groups_id = :table_group_id
+ ORDER BY LOWER(c.table_name), c.ordinal_position
+ """,
+ {"table_group_id": str(table_group.id)},
+ )
+
+ rows = []
+ for row in list(table_data) + list(column_data):
+ csv_row = {
+ "Table": row["table_name"],
+ "Column": row["column_name"],
+ "Description": row["description"] or "",
+ "Critical Data Element": "Yes" if row["critical_data_element"] is True else "No" if row["critical_data_element"] is False else "",
+ "PII": "Yes" if row.get("pii_flag") else "No",
+ "Excluded Data Element": "Yes" if row.get("excluded_data_element") else "No",
+ }
+ for tag in TAG_FIELDS:
+ header = tag.replace("_", " ").title()
+ csv_row[header] = row[tag] or ""
+ rows.append(csv_row)
+
+ df = pd.DataFrame(rows)
+ csv_content = df.to_csv(index=False)
+ update_progress(1.0)
+ return "Data Catalog Metadata.csv", "text/csv", csv_content
+
+ download_dialog(
+ dialog_title="Download Metadata CSV",
+ file_content_func=_get_csv_data,
+ )
@st.cache_data(show_spinner=False)
def get_table_group_columns(table_group_id: str) -> list[dict]:
if not is_uuid4(table_group_id):
return []
-
+
query = f"""
SELECT CONCAT('column_', column_chars.column_id) AS column_id,
CONCAT('table_', table_chars.table_id) AS table_id,
@@ -407,6 +530,8 @@ def get_table_group_columns(table_group_id: str) -> list[dict]:
table_chars.drop_date AS table_drop_date,
column_chars.critical_data_element,
table_chars.critical_data_element AS table_critical_data_element,
+ column_chars.excluded_data_element,
+ column_chars.pii_flag,
{", ".join([ f"column_chars.{tag}" for tag in TAG_FIELDS ])},
{", ".join([ f"table_chars.{tag} AS table_{tag}" for tag in TAG_FIELDS ])}
FROM data_column_chars column_chars
@@ -424,7 +549,7 @@ def get_table_group_columns(table_group_id: str) -> list[dict]:
params = {"table_group_id": table_group_id}
results = fetch_all_from_db(query, params)
- return [ dict(row) for row in results ]
+ return [dict(row) for row in results]
def get_selected_item(selected: str, table_group_id: str) -> dict | None:
@@ -445,8 +570,16 @@ def get_selected_item(selected: str, table_group_id: str) -> dict | None:
item["dq_score_profiling"] = friendly_score(item["dq_score_profiling"])
item["dq_score_testing"] = friendly_score(item["dq_score_testing"])
item["hygiene_issues"] = get_hygiene_issues(item["profile_run_id"], item["table_name"], item.get("column_name"))
- item["test_issues"] = get_latest_test_issues(item["table_group_id"], item["table_name"], item.get("column_name"))
- item["test_suites"] = get_related_test_suites(item["table_group_id"], item["table_name"], item.get("column_name"))
+ item["test_issues"] = get_latest_test_issues(
+ item["table_group_id"], item["table_name"], item.get("column_name")
+ )
+ item["test_suites"] = get_related_test_suites(
+ item["table_group_id"], item["table_name"], item.get("column_name")
+ )
+ if not session.auth.user_has_permission("view_pii"):
+ pii_columns = get_pii_columns(item["table_group_id"], table_name=item["table_name"])
+ mask_profiling_pii(item, pii_columns)
+ mask_hygiene_detail(item.get("hygiene_issues", []), pii_columns)
return item
@@ -491,7 +624,7 @@ def get_latest_test_issues(table_group_id: str, table_name: str, column_name: st
}
results = fetch_all_from_db(query, params)
- return [ dict(row) for row in results ]
+ return [dict(row) for row in results]
@st.cache_data(show_spinner=False)
@@ -518,7 +651,7 @@ def get_related_test_suites(table_group_id: str, table_name: str, column_name: s
}
results = fetch_all_from_db(query, params)
- return [ dict(row) for row in results ]
+ return [dict(row) for row in results]
@st.cache_data(show_spinner=False)
diff --git a/testgen/ui/views/dialogs/column_history_dialog.py b/testgen/ui/views/dialogs/column_history_dialog.py
index 24915163..a82282a1 100644
--- a/testgen/ui/views/dialogs/column_history_dialog.py
+++ b/testgen/ui/views/dialogs/column_history_dialog.py
@@ -3,10 +3,12 @@
from testgen.common.models import with_database_session
from testgen.common.models.profiling_run import ProfilingRun
+from testgen.common.pii_masking import get_pii_columns, mask_profiling_pii
from testgen.ui.components import widgets as testgen
from testgen.ui.components.widgets import testgen_component
from testgen.ui.queries.profiling_queries import COLUMN_PROFILING_FIELDS
from testgen.ui.services.database_service import fetch_one_from_db
+from testgen.ui.session import session
from testgen.utils import make_json_safe
@@ -37,9 +39,20 @@ def _column_history_dialog(
ProfilingRun.profiling_starttime >= func.to_timestamp(add_date),
)
profiling_runs = [run.to_dict(json_safe=True) for run in profiling_runs]
+
+ if not profiling_runs:
+ st.info("No profiling runs are available for this column. Run profiling first to see column history.")
+ return
+
+ with loading_column:
+ with st.spinner("Loading data ..."):
run_id = st.session_state.get("column_history_dialog:run_id") or profiling_runs[0]["id"]
selected_item = get_run_column(run_id, schema_name, table_name, column_name)
+ if selected_item and not session.auth.user_has_permission("view_pii"):
+ pii_columns = get_pii_columns(table_group_id, table_name=table_name)
+ mask_profiling_pii(selected_item, pii_columns)
+
testgen_component(
"column_profiling_history",
props={
diff --git a/testgen/ui/views/dialogs/data_preview_dialog.py b/testgen/ui/views/dialogs/data_preview_dialog.py
index 8a65b006..ee029644 100644
--- a/testgen/ui/views/dialogs/data_preview_dialog.py
+++ b/testgen/ui/views/dialogs/data_preview_dialog.py
@@ -3,8 +3,10 @@
from testgen.common.database.database_service import get_flavor_service
from testgen.common.models.connection import Connection
+from testgen.common.pii_masking import get_pii_columns, mask_source_data_pii
from testgen.ui.components import widgets as testgen
from testgen.ui.services.database_service import fetch_from_target_db
+from testgen.ui.session import session
from testgen.utils import to_dataframe
@@ -26,6 +28,10 @@ def data_preview_dialog(
with st.spinner("Loading data ..."):
data = get_preview_data(table_group_id, schema_name, table_name, column_name)
+ if not data.empty and not session.auth.user_has_permission("view_pii"):
+ pii_columns = get_pii_columns(table_group_id, schema_name, table_name)
+ mask_source_data_pii(data, pii_columns)
+
if data.empty:
st.warning("The preview data could not be loaded.")
else:
@@ -47,14 +53,15 @@ def get_preview_data(
if connection:
flavor_service = get_flavor_service(connection.sql_flavor)
- use_top = flavor_service.use_top
+ row_limiting = flavor_service.row_limiting_clause
quote = flavor_service.quote_character
query = f"""
SELECT DISTINCT
- {"TOP 100" if use_top else ""}
+ {"TOP 100" if row_limiting == "top" else ""}
{f"{quote}{column_name}{quote}" if column_name else "*"}
FROM {quote}{schema_name}{quote}.{quote}{table_name}{quote}
- {"LIMIT 100" if not use_top else ""}
+ {"LIMIT 100" if row_limiting == "limit" else ""}
+ {"FETCH FIRST 100 ROWS ONLY" if row_limiting == "fetch" else ""}
"""
try:
diff --git a/testgen/ui/views/dialogs/generate_tests_dialog.py b/testgen/ui/views/dialogs/generate_tests_dialog.py
index 0da5e623..ad67ed3b 100644
--- a/testgen/ui/views/dialogs/generate_tests_dialog.py
+++ b/testgen/ui/views/dialogs/generate_tests_dialog.py
@@ -7,6 +7,7 @@
from testgen.common.models.test_suite import TestSuiteMinimal
from testgen.ui.components import widgets as testgen
from testgen.ui.services.database_service import execute_db_query, fetch_all_from_db, fetch_one_from_db
+from testgen.ui.services.rerun_service import safe_rerun
@st.dialog(title="Generate Tests")
@@ -76,8 +77,7 @@ def generate_tests_dialog(test_suite: TestSuiteMinimal) -> None:
status_container.success(f"Test generation completed for test suite **{test_suite_name}**.")
time.sleep(1)
- st.cache_data.clear()
- st.rerun()
+ safe_rerun()
def get_test_suite_refresh_warning(test_suite_id: str) -> tuple[int, int, int]:
diff --git a/testgen/ui/views/dialogs/import_metadata_dialog.py b/testgen/ui/views/dialogs/import_metadata_dialog.py
new file mode 100644
index 00000000..209cd308
--- /dev/null
+++ b/testgen/ui/views/dialogs/import_metadata_dialog.py
@@ -0,0 +1,494 @@
+import base64
+import io
+import logging
+import time
+from datetime import datetime
+
+import pandas as pd
+import streamlit as st
+
+from testgen.common.models import with_database_session
+from testgen.common.models.table_group import TableGroup
+from testgen.ui.components.widgets.testgen_component import testgen_component
+from testgen.ui.queries.profiling_queries import TAG_FIELDS
+from testgen.ui.services.database_service import execute_db_query, fetch_all_from_db
+from testgen.ui.services.rerun_service import safe_rerun
+from testgen.ui.session import session, temp_value
+
+LOG = logging.getLogger("testgen")
+
+HEADER_MAP = {
+ "table": "table_name",
+ "column": "column_name",
+ "description": "description",
+ "critical data element": "critical_data_element",
+ "cde": "critical_data_element",
+ "excluded": "excluded_data_element",
+ "excluded data element": "excluded_data_element",
+ "xde": "excluded_data_element",
+ "pii": "pii_flag",
+ "pii flag": "pii_flag",
+ "data source": "data_source",
+ "source system": "source_system",
+ "source process": "source_process",
+ "business domain": "business_domain",
+ "stakeholder group": "stakeholder_group",
+ "transform level": "transform_level",
+ "aggregation level": "aggregation_level",
+ "data product": "data_product",
+}
+
+METADATA_COLUMNS = ["description", "critical_data_element", "excluded_data_element", "pii_flag", *TAG_FIELDS]
+
+TRUE_VALUES = {"yes", "y", "true", "1"}
+FALSE_VALUES = {"no", "n", "false", "0"}
+
+TAG_MAX_LENGTH = 40
+DESCRIPTION_MAX_LENGTH = 1000
+
+
+def parse_import_csv(content: str, table_group_id: str, blank_behavior: str) -> dict:
+ parsed = _parse_csv(content)
+ if "error" in parsed:
+ return parsed
+
+ return _match_and_validate(parsed["df"], parsed["duplicate_rows"], table_group_id, blank_behavior)
+
+
+def _parse_csv(content: str) -> dict:
+ try:
+ raw_bytes = base64.b64decode(content.split(",")[1])
+ df = pd.read_csv(io.BytesIO(raw_bytes), dtype=str, keep_default_na=False)
+ except Exception as e:
+ LOG.warning("CSV parse error: %s", e)
+ return {"error": f"Could not parse CSV file: {e}"}
+
+ # Normalize headers
+ normalized_columns = {}
+ for col in df.columns:
+ key = col.strip().lower().replace("_", " ")
+ mapped = HEADER_MAP.get(key)
+ if mapped:
+ normalized_columns[col] = mapped
+
+ if "table_name" not in normalized_columns.values():
+ return {"error": "CSV must contain a 'Table' column."}
+
+ df = df.rename(columns=normalized_columns)
+ # Keep only recognized columns
+ recognized = [c for c in df.columns if c in ("table_name", "column_name", *METADATA_COLUMNS)]
+ df = df[recognized]
+
+ if df.empty:
+ return {"error": "CSV file is empty."}
+
+ # Strip whitespace from all string fields
+ for col in df.columns:
+ df[col] = df[col].str.strip()
+
+ # Deduplicate: last occurrence wins, mark earlier duplicates
+ has_column_name = "column_name" in df.columns
+ if not has_column_name:
+ df["column_name"] = ""
+ dedup_cols = ["table_name", "column_name"] if has_column_name else ["table_name"]
+ is_last = ~df.duplicated(subset=dedup_cols, keep="last")
+ duplicate_rows = df[~is_last]
+ df = df[is_last]
+
+ return {"df": df, "duplicate_rows": duplicate_rows}
+
+
+def _match_and_validate(
+ df: pd.DataFrame, duplicate_rows: pd.DataFrame, table_group_id: str, blank_behavior: str
+) -> dict:
+ # Query existing tables and columns in this table group
+ existing_tables = fetch_all_from_db(
+ """
+ SELECT table_id::VARCHAR, table_name
+ FROM data_table_chars
+ WHERE table_groups_id = :table_group_id
+ """,
+ {"table_group_id": table_group_id},
+ )
+ table_lookup = {row["table_name"]: row["table_id"] for row in existing_tables}
+
+ existing_columns = fetch_all_from_db(
+ """
+ SELECT column_id::VARCHAR, table_name, column_name
+ FROM data_column_chars
+ WHERE table_groups_id = :table_group_id
+ """,
+ {"table_group_id": table_group_id},
+ )
+ column_lookup = {(row["table_name"], row["column_name"]): row["column_id"] for row in existing_columns}
+
+ table_rows = []
+ column_rows = []
+ preview_rows = []
+
+ for _, dup_row in duplicate_rows.iterrows():
+ preview_rows.append({
+ "table_name": dup_row["table_name"],
+ "column_name": dup_row.get("column_name", ""),
+ "_status": "unmatched",
+ "_status_detail": "Duplicate row \u2014 last occurrence will be used",
+ "_truncated_fields": [],
+ })
+
+ for _, row in df.iterrows():
+ table_name = row["table_name"]
+ column_name = row.get("column_name", "")
+
+ if not table_name:
+ continue
+
+ is_table_row = not column_name
+ preview_row = {"table_name": table_name, "column_name": column_name or ""}
+
+ if is_table_row:
+ table_id = table_lookup.get(table_name)
+ if not table_id:
+ preview_row["_status"] = "unmatched"
+ preview_row["_status_detail"] = "Table not found in catalog"
+ preview_rows.append(preview_row)
+ continue
+
+ fields, bad_cde, bad_xde, bad_pii = _extract_metadata_fields(row, blank_behavior)
+ fields, truncated = _truncate_fields(fields)
+ if fields and not bad_cde and not bad_xde and not bad_pii:
+ table_rows.append({"table_id": table_id, "table_name": table_name, **fields})
+
+ preview_row.update(fields)
+ _set_row_status(preview_row, bad_cde, bad_xde, bad_pii, truncated)
+ preview_rows.append(preview_row)
+ else:
+ column_id = column_lookup.get((table_name, column_name))
+ if not column_id:
+ preview_row["_status"] = "unmatched"
+ preview_row["_status_detail"] = (
+ "Table not found in catalog" if table_name not in table_lookup else "Column not found in catalog"
+ )
+ preview_rows.append(preview_row)
+ continue
+
+ fields, bad_cde, bad_xde, bad_pii = _extract_metadata_fields(row, blank_behavior)
+ fields, truncated = _truncate_fields(fields)
+ if fields and not bad_cde and not bad_xde and not bad_pii:
+ column_rows.append(
+ {"column_id": column_id, "table_name": table_name, "column_name": column_name, **fields}
+ )
+
+ preview_row.update(fields)
+ _set_row_status(preview_row, bad_cde, bad_xde, bad_pii, truncated)
+ preview_rows.append(preview_row)
+
+ # Determine which metadata columns are present in the CSV
+ metadata_columns = [c for c in METADATA_COLUMNS if c in df.columns]
+
+ # Strip PII column if user lacks permission
+ pii_skipped = False
+ if "pii_flag" in metadata_columns and not session.auth.user_has_permission("view_pii"):
+ metadata_columns.remove("pii_flag")
+ pii_skipped = True
+
+ # Count matched vs skipped rows from preview
+ # "ok" and "warning" rows will be imported; "error" and "unmatched" rows are skipped
+ _importable = {"ok", "warning"}
+ matched_tables = sum(1 for r in preview_rows if not r.get("column_name") and r.get("_status") in _importable)
+ matched_columns = sum(1 for r in preview_rows if r.get("column_name") and r.get("_status") in _importable)
+ skipped = sum(1 for r in preview_rows if r.get("_status") not in _importable)
+
+ table_group = TableGroup.get(table_group_id)
+
+ return {
+ "table_rows": table_rows,
+ "column_rows": column_rows,
+ "preview_rows": preview_rows,
+ "metadata_columns": metadata_columns,
+ "blank_behavior": blank_behavior,
+ "matched_tables": matched_tables,
+ "matched_columns": matched_columns,
+ "skipped_count": skipped,
+ "warn_cde": bool("critical_data_element" in metadata_columns and table_group.profile_flag_cdes),
+ "warn_pii": bool("pii_flag" in metadata_columns and table_group.profile_flag_pii),
+ "pii_skipped": pii_skipped,
+ }
+
+
+def _extract_metadata_fields(row: pd.Series, blank_behavior: str) -> tuple[dict, bool, bool, bool]:
+ fields = {}
+ bad_cde = False
+ bad_xde = False
+ bad_pii = False
+ for col in METADATA_COLUMNS:
+ if col not in row.index:
+ continue
+
+ value = row[col]
+
+ if col == "critical_data_element":
+ if value.lower() in TRUE_VALUES:
+ fields[col] = True
+ elif value.lower() in FALSE_VALUES:
+ fields[col] = False
+ elif not value:
+ if blank_behavior == "clear":
+ fields[col] = None
+ # "keep" → skip this field
+ else:
+ # Unrecognized value — skip (don't set field at all)
+ bad_cde = True
+ elif col == "excluded_data_element":
+ if value.lower() in TRUE_VALUES:
+ fields[col] = True
+ elif value.lower() in FALSE_VALUES:
+ fields[col] = False
+ elif not value:
+ if blank_behavior == "clear":
+ fields[col] = False
+ else:
+ bad_xde = True
+ elif col == "pii_flag":
+ if value.lower() in TRUE_VALUES:
+ fields[col] = "MANUAL"
+ elif value.lower() in FALSE_VALUES:
+ fields[col] = None
+ elif not value:
+ if blank_behavior == "clear":
+ fields[col] = None
+ else:
+ bad_pii = True
+ else:
+ if value:
+ fields[col] = value
+ elif blank_behavior == "clear":
+ fields[col] = ""
+ # "keep" with blank value → skip this field
+
+ return fields, bad_cde, bad_xde, bad_pii
+
+
+def _truncate_fields(fields: dict) -> tuple[dict, list[str]]:
+ truncated = []
+ for key, value in fields.items():
+ if not isinstance(value, str):
+ continue
+ max_len = DESCRIPTION_MAX_LENGTH if key == "description" else TAG_MAX_LENGTH
+ if len(value) > max_len:
+ fields[key] = value[:max_len]
+ truncated.append(key)
+ return fields, truncated
+
+
+def _set_row_status(preview_row: dict, bad_cde: bool, bad_xde: bool, bad_pii: bool, truncated: list[str]) -> None:
+ issues = []
+ if bad_cde:
+ issues.append("Unrecognized CDE value (expected Yes/No) — skipped")
+ if bad_xde:
+ issues.append("Unrecognized XDE value (expected Yes/No) - skipped")
+ if bad_pii:
+ issues.append("Unrecognized PII value (expected Yes/No) - skipped")
+ if truncated:
+ issues.append(f"Values truncated: {', '.join(truncated)}")
+
+ if bad_cde or bad_xde or bad_pii:
+ preview_row["_status"] = "error"
+ elif truncated:
+ preview_row["_status"] = "warning"
+ else:
+ preview_row["_status"] = "ok"
+ preview_row["_status_detail"] = "\n".join(issues)
+ preview_row["_truncated_fields"] = truncated
+
+
+def apply_metadata_import(preview: dict, table_group_id: str | None = None) -> dict:
+ table_count = 0
+ column_count = 0
+
+ for row in preview.get("table_rows", []):
+ set_clauses, params = _build_update_params(row, preview["metadata_columns"], is_column=False)
+ if not set_clauses:
+ continue
+ params["table_id"] = row["table_id"]
+ execute_db_query(
+ f"UPDATE data_table_chars SET {', '.join(set_clauses)} WHERE table_id = CAST(:table_id AS UUID)",
+ params,
+ )
+ table_count += 1
+
+ for row in preview.get("column_rows", []):
+ set_clauses, params = _build_update_params(row, preview["metadata_columns"], is_column=True)
+ if not set_clauses:
+ continue
+ params["column_id"] = row["column_id"]
+ execute_db_query(
+ f"UPDATE data_column_chars SET {', '.join(set_clauses)} WHERE column_id = CAST(:column_id AS UUID)",
+ params,
+ )
+ column_count += 1
+
+ if table_group_id:
+ _disable_autoflags(table_group_id, preview.get("metadata_columns", []))
+
+ return {"table_count": table_count, "column_count": column_count}
+
+
+def _disable_autoflags(table_group_id: str, metadata_columns: list[str]) -> None:
+ table_group = TableGroup.get(table_group_id)
+ changed = False
+ if "critical_data_element" in metadata_columns and table_group.profile_flag_cdes:
+ table_group.profile_flag_cdes = False
+ changed = True
+ if "pii_flag" in metadata_columns and table_group.profile_flag_pii:
+ table_group.profile_flag_pii = False
+ changed = True
+ if changed:
+ table_group.save()
+
+
+def _build_update_params(row: dict, metadata_columns: list[str], is_column: bool = False) -> tuple[list[str], dict]:
+ set_clauses = []
+ params = {}
+
+ for col in metadata_columns:
+ if col not in row:
+ continue
+
+ value = row[col]
+ if col == "critical_data_element":
+ set_clauses.append("critical_data_element = :critical_data_element")
+ params["critical_data_element"] = value
+ elif col == "excluded_data_element":
+ if is_column:
+ set_clauses.append("excluded_data_element = :excluded_data_element")
+ params["excluded_data_element"] = value
+ elif col == "pii_flag":
+ # Prevent user from editing PII flag if they cannot view PII
+ if is_column and session.auth.user_has_permission("view_pii"):
+ set_clauses.append("pii_flag = :pii_flag")
+ params["pii_flag"] = value
+ else:
+ set_clauses.append(f"{col} = NULLIF(:{col}, '')")
+ params[col] = value if value is not None else ""
+
+ return set_clauses, params
+
+
+PREVIEW_SESSION_KEY = "import_metadata:preview"
+
+
+def open_import_metadata_dialog(table_group_id: str) -> None:
+ """Clear stale preview state before opening the dialog."""
+ st.session_state.pop(PREVIEW_SESSION_KEY, None)
+ import_metadata_dialog(table_group_id)
+
+
+@st.dialog(title="Import Metadata", width="large")
+@with_database_session
+def import_metadata_dialog(table_group_id: str) -> None:
+ should_import, set_should_import = temp_value("import_metadata:import")
+
+ def on_file_uploaded(payload: dict) -> None:
+ content = payload["content"]
+ blank_behavior = payload["blank_behavior"]
+ preview = parse_import_csv(content, table_group_id, blank_behavior)
+ st.session_state[PREVIEW_SESSION_KEY] = preview
+
+ def on_file_cleared(_payload: dict) -> None:
+ st.session_state.pop(PREVIEW_SESSION_KEY, None)
+
+ # Preview persists in session state (not temp_value) so it survives across reruns
+ preview = st.session_state.get(PREVIEW_SESSION_KEY)
+
+ result = None
+ if should_import() and preview and not preview.get("error"):
+ try:
+ apply_metadata_import(preview, table_group_id)
+
+ # Clear caches
+ from testgen.ui.queries.profiling_queries import get_column_by_id, get_table_by_id
+ from testgen.ui.views.data_catalog import get_table_group_columns, get_tag_values
+
+ for func in [get_table_group_columns, get_table_by_id, get_column_by_id, get_tag_values]:
+ func.clear()
+ st.session_state["data_catalog:last_saved_timestamp"] = datetime.now().timestamp()
+
+ parts = []
+ if tc := preview.get("matched_tables", 0):
+ parts.append(f"{tc} {'table' if tc == 1 else 'tables'}")
+ if cc := preview.get("matched_columns", 0):
+ parts.append(f"{cc} {'column' if cc == 1 else 'columns'}")
+ summary = f"Metadata for {', '.join(parts)} imported." if parts else "No metadata was imported."
+
+ result = {
+ "success": True,
+ "message": summary,
+ }
+ except Exception:
+ LOG.exception("Metadata import failed")
+ result = {
+ "success": False,
+ "message": "Something went wrong while importing the metadata.",
+ }
+
+ st.session_state.pop(PREVIEW_SESSION_KEY, None)
+
+ # Build preview data for JS display
+ preview_props = None
+ if preview:
+ if preview.get("error"):
+ preview_props = {"error": preview["error"]}
+ else:
+ preview_props = _build_preview_props(preview)
+
+ testgen_component(
+ "import_metadata_dialog",
+ props={
+ "preview": preview_props,
+ "result": result,
+ },
+ on_change_handlers={
+ "FileUploaded": on_file_uploaded,
+ "FileCleared": on_file_cleared,
+ "ImportConfirmed": lambda _: set_should_import(True),
+ },
+ )
+
+ if result and result["success"]:
+ time.sleep(2)
+ safe_rerun()
+
+
+def _build_preview_props(preview: dict) -> dict:
+ formatted_rows = []
+ metadata_columns = preview.get("metadata_columns", [])
+
+ for row in preview.get("preview_rows", []):
+ formatted_row = {
+ "table_name": row["table_name"],
+ "column_name": row["column_name"],
+ "_status": row.get("_status", "ok"),
+ "_status_detail": row.get("_status_detail", ""),
+ "_truncated_fields": row.get("_truncated_fields", []),
+ }
+ for col in metadata_columns:
+ if col in row:
+ val = row[col]
+ if col in ["excluded_data_element", "pii_flag"]:
+ formatted_row[col] = "Yes" if val else "No"
+ else:
+ formatted_row[col] = (
+ "Yes" if val is True else "No" if val is False else ("" if val is None else str(val))
+ )
+ formatted_rows.append(formatted_row)
+
+ return {
+ "table_count": preview.get("matched_tables", 0),
+ "column_count": preview.get("matched_columns", 0),
+ "skipped_count": preview.get("skipped_count", 0),
+ "metadata_columns": metadata_columns,
+ "preview_rows": formatted_rows,
+ "warn_cde": preview.get("warn_cde", False),
+ "warn_pii": preview.get("warn_pii", False),
+ "pii_skipped": preview.get("pii_skipped", False),
+ }
diff --git a/testgen/ui/views/dialogs/manage_notifications.py b/testgen/ui/views/dialogs/manage_notifications.py
index c1037d4a..af35f828 100644
--- a/testgen/ui/views/dialogs/manage_notifications.py
+++ b/testgen/ui/views/dialogs/manage_notifications.py
@@ -6,10 +6,11 @@
import streamlit as st
-from testgen.common.models import with_database_session
+from testgen.common.models import database_session, with_database_session
from testgen.common.models.notification_settings import NotificationSettings, NotificationSettingsValidationError
from testgen.common.models.settings import PersistedSetting
from testgen.ui.components import widgets
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.session import session, temp_value
LOG = logging.getLogger("testgen")
@@ -41,7 +42,8 @@ def decorator(method):
@wraps(method)
def wrapper(self, *args, **kwargs):
try:
- with_database_session(method)(self, *args, **kwargs)
+ with database_session():
+ method(self, *args, **kwargs)
except NotificationSettingsValidationError as e:
success = False
message = str(e)
@@ -55,7 +57,7 @@ def wrapper(self, *args, **kwargs):
# The ever-changing "idx" is useful to force refreshing the component
self.set_result({"success": success, "message": message, "idx": next(self._result_idx)})
- st.rerun(scope="fragment")
+ safe_rerun(scope="fragment")
return wrapper
return decorator
@@ -141,7 +143,7 @@ def render(self) -> None:
scope_options_labels = dict(component_props.get("scope_options", []))
ns_json_list = sorted(
self._mark_duplicates(ns_json_list),
- key=lambda item: "0" if not item["scope"] else scope_options_labels.get(item["scope"], "ZZZ"),
+ key=lambda item: "0" if not item.get("scope") else scope_options_labels.get(item["scope"], "ZZZ"),
)
widgets.css_class("m-dialog")
widgets.testgen_component(
diff --git a/testgen/ui/views/dialogs/manage_schedules.py b/testgen/ui/views/dialogs/manage_schedules.py
index 82ff0551..c2e459c6 100644
--- a/testgen/ui/views/dialogs/manage_schedules.py
+++ b/testgen/ui/views/dialogs/manage_schedules.py
@@ -6,9 +6,10 @@
import streamlit as st
from sqlalchemy.exc import IntegrityError
-from testgen.common.models import Session, with_database_session
+from testgen.common.models import database_session, with_database_session
from testgen.common.models.scheduler import JobSchedule
from testgen.ui.components import widgets as testgen
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.session import session, temp_value
from testgen.ui.utils import get_cron_sample_handler
@@ -40,21 +41,22 @@ def open(self, project_code: str) -> None:
self.init()
return st.dialog(title=self.title)(self.render)()
+ @with_database_session
def render(self) -> None:
@with_database_session
def on_delete_sched(item):
JobSchedule.delete(item["id"])
- st.rerun(scope="fragment")
+ safe_rerun(scope="fragment")
@with_database_session
def on_pause_sched(item):
JobSchedule.update_active(item["id"], False)
- st.rerun(scope="fragment")
+ safe_rerun(scope="fragment")
@with_database_session
def on_resume_sched(item):
JobSchedule.update_active(item["id"], True)
- st.rerun(scope="fragment")
+ safe_rerun(scope="fragment")
def on_add_schedule(payload: dict[str, str]):
set_arg_value(payload["arg_value"])
@@ -98,7 +100,8 @@ def on_add_schedule(payload: dict[str, str]):
args=args,
kwargs=kwargs,
)
- with_database_session(sched_model.save)()
+ with database_session():
+ sched_model.save()
else:
success = False
message = "Complete all the fields before adding the schedule"
@@ -113,7 +116,7 @@ def on_add_schedule(payload: dict[str, str]):
message = "Error validating the Cron expression"
results = {"success": success, "message": message}
- with Session() as db_session:
+ with database_session() as db_session:
scheduled_jobs = (
db_session.query(JobSchedule)
.where(JobSchedule.project_code == self.project_code, JobSchedule.key == self.job_key)
diff --git a/testgen/ui/views/dialogs/profiling_results_dialog.py b/testgen/ui/views/dialogs/profiling_results_dialog.py
index 3b824a5b..f8907db3 100644
--- a/testgen/ui/views/dialogs/profiling_results_dialog.py
+++ b/testgen/ui/views/dialogs/profiling_results_dialog.py
@@ -4,7 +4,9 @@
import testgen.ui.queries.profiling_queries as profiling_queries
from testgen.common.models import with_database_session
+from testgen.common.pii_masking import get_pii_columns, mask_profiling_pii
from testgen.ui.components.widgets.testgen_component import testgen_component
+from testgen.ui.session import session
from testgen.utils import make_json_safe
@@ -24,6 +26,10 @@ def profiling_results_dialog(column_name: str, table_name: str, table_groups_id:
column = profiling_queries.get_column_by_name(column_name, table_name, table_groups_id)
if column:
+ if not session.auth.user_has_permission("view_pii"):
+ pii_columns = get_pii_columns(table_groups_id, table_name=table_name)
+ mask_profiling_pii(column, pii_columns)
+
testgen_component(
"column_profiling_results",
props={ "column": json.dumps(make_json_safe(column)) },
diff --git a/testgen/ui/views/dialogs/run_profiling_dialog.py b/testgen/ui/views/dialogs/run_profiling_dialog.py
index 74d6dc02..de77622f 100644
--- a/testgen/ui/views/dialogs/run_profiling_dialog.py
+++ b/testgen/ui/views/dialogs/run_profiling_dialog.py
@@ -4,10 +4,10 @@
import streamlit as st
from testgen.commands.run_profiling import run_profiling_in_background
-from testgen.common.models.profiling_run import ProfilingRun
from testgen.common.models.table_group import TableGroup
from testgen.ui.components import widgets as testgen
from testgen.ui.navigation.router import Router
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.session import session, temp_value
LINK_HREF = "profiling-runs"
@@ -68,5 +68,4 @@ def on_run_profiling_confirmed(table_group: dict) -> None:
if result and result["success"] and not result["show_link"]:
time.sleep(2)
- ProfilingRun.select_summary.clear()
- st.rerun()
+ safe_rerun()
diff --git a/testgen/ui/views/dialogs/run_tests_dialog.py b/testgen/ui/views/dialogs/run_tests_dialog.py
index 1350a230..35798819 100644
--- a/testgen/ui/views/dialogs/run_tests_dialog.py
+++ b/testgen/ui/views/dialogs/run_tests_dialog.py
@@ -6,6 +6,7 @@
from testgen.common.models import with_database_session
from testgen.common.models.test_suite import TestSuite, TestSuiteMinimal
from testgen.ui.components import widgets as testgen
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.session import session
from testgen.utils import to_dataframe
@@ -52,11 +53,13 @@ def run_tests_dialog(project_code: str, test_suite: TestSuiteMinimal | None = No
button_container = st.empty()
status_container = st.empty()
+ link_clicked = st.session_state.get(LINK_KEY)
run_test_button = None
- with button_container:
- _, button_column = st.columns([.8, .2])
- with button_column:
- run_test_button = st.button("Run Tests", use_container_width=True, disabled=not test_suite_id)
+ if not link_clicked:
+ with button_container:
+ _, button_column = st.columns([.8, .2])
+ with button_column:
+ run_test_button = st.button("Run Tests", use_container_width=True, disabled=not test_suite_id)
if run_test_button:
button_container.empty()
@@ -68,7 +71,7 @@ def run_tests_dialog(project_code: str, test_suite: TestSuiteMinimal | None = No
status_container.error(f"Test run encountered errors: {e!s}.")
# The second condition is needed for the link to work
- if run_test_button or st.session_state.get(LINK_KEY):
+ if run_test_button or link_clicked:
with status_container.container():
st.success(
f"Test run started for test suite **{test_suite_name}**."
@@ -87,5 +90,4 @@ def run_tests_dialog(project_code: str, test_suite: TestSuiteMinimal | None = No
)
else:
time.sleep(2)
- st.cache_data.clear()
- st.rerun()
+ safe_rerun()
diff --git a/testgen/ui/views/dialogs/test_definition_notes_dialog.py b/testgen/ui/views/dialogs/test_definition_notes_dialog.py
new file mode 100644
index 00000000..26a269c6
--- /dev/null
+++ b/testgen/ui/views/dialogs/test_definition_notes_dialog.py
@@ -0,0 +1,39 @@
+import streamlit as st
+
+from testgen.common.models import with_database_session
+from testgen.common.models.test_definition import TestDefinitionNote
+from testgen.ui.components import widgets as testgen
+from testgen.ui.queries import test_result_queries
+from testgen.ui.session import session
+
+
+@st.dialog(title="Test Notes", on_dismiss="rerun")
+@with_database_session
+def test_definition_notes_dialog(test_definition_id: str, test_label: dict) -> None:
+ current_user = session.auth.user.username if session.auth.user else "unknown"
+ notes = TestDefinitionNote.get_notes(test_definition_id)
+
+ def on_note_added(payload: dict) -> None:
+ TestDefinitionNote.add_note(test_definition_id, payload["text"], current_user)
+ test_result_queries.get_test_results.clear()
+
+ def on_note_updated(payload: dict) -> None:
+ TestDefinitionNote.update_note(payload["id"], payload["text"])
+
+ def on_note_deleted(payload: dict) -> None:
+ TestDefinitionNote.delete_note(payload["id"])
+ test_result_queries.get_test_results.clear()
+
+ testgen.testgen_component(
+ "test_definition_notes",
+ props={
+ "test_label": test_label,
+ "notes": notes,
+ "current_user": current_user,
+ },
+ on_change_handlers={
+ "NoteAdded": on_note_added,
+ "NoteUpdated": on_note_updated,
+ "NoteDeleted": on_note_deleted,
+ },
+ )
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index 15e04614..e4ef88c3 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -12,6 +12,7 @@
from testgen.common.models import with_database_session
from testgen.common.models.hygiene_issue import HygieneIssue
from testgen.common.models.profiling_run import ProfilingRun
+from testgen.common.pii_masking import mask_hygiene_detail
from testgen.ui.components import widgets as testgen
from testgen.ui.components.widgets.download_dialog import (
FILE_DATA_TYPE,
@@ -59,12 +60,19 @@ def render(
)
return
+ if not session.auth.user_has_project_access(run.project_code):
+ self.router.navigate_with_warning(
+ "You don't have access to view this resource. Redirecting ...",
+ "profiling-runs",
+ )
+ return
+
run_date = date_service.get_timezoned_timestamp(st.session_state, run.profiling_starttime)
session.set_sidebar_project(run.project_code)
testgen.page_header(
"Hygiene Issues",
- "data-hygiene-issues",
+ "data-profiling/data-hygiene-issues/",
breadcrumbs=[
{ "label": "Profiling Runs", "path": "profiling-runs", "params": { "project_code": run.project_code } },
{ "label": f"{run.table_groups_name} | {run_date}" },
@@ -177,6 +185,10 @@ def render(
# Get hygiene issue list
df_pa = get_profiling_anomalies(run_id, likelihood, issue_type_id, table_name, column_name, action, sorting_columns)
+ # Mask detail for PII columns with redactable details
+ if not session.auth.user_has_permission("view_pii"):
+ mask_hygiene_detail(df_pa)
+
# Retrieve disposition action (cache refreshed)
df_action = get_anomaly_disposition(run_id)
@@ -302,8 +314,6 @@ def open_download_dialog(data: pd.DataFrame | None = None) -> None:
int_data_width=700,
)
- cached_functions = [get_anomaly_disposition, get_profiling_anomaly_summary, get_profiling_anomalies]
-
disposition_actions = [
{ "icon": "✓", "help": "Confirm this issue as relevant for this run", "status": "Confirmed" },
{ "icon": "✘", "help": "Dismiss this issue as not relevant for this run", "status": "Dismissed" },
@@ -327,8 +337,6 @@ def open_download_dialog(data: pd.DataFrame | None = None) -> None:
fm.reset_post_updates(
do_disposition_update(selected, d_action["status"]),
as_toast=True,
- clear_cache=True,
- lst_cached_functions=cached_functions,
)
# Needs to be after all data loading/updating
@@ -434,6 +442,10 @@ def get_excel_report_data(
if data is None:
data = get_profiling_anomalies(run_id)
+ if not session.auth.user_has_permission("view_pii"):
+ data = data.copy()
+ mask_hygiene_detail(data)
+
columns = {
"table_name": {"header": "Table"},
"column_name": {"header": "Column"},
@@ -464,14 +476,15 @@ def source_data_dialog(selected_row):
st.markdown("#### Hygiene Issue Detail")
st.caption(selected_row["detail"])
+ mask_pii = not session.auth.user_has_permission("view_pii")
with st.spinner("Retrieving source data..."):
- bad_data_status, bad_data_msg, _, df_bad = get_hygiene_issue_source_data(selected_row, limit=500)
+ bad_data_status, bad_data_msg, _, df_bad = get_hygiene_issue_source_data(selected_row, limit=500, mask_pii=mask_pii)
if bad_data_status in {"ND", "NA"}:
st.info(bad_data_msg)
elif bad_data_status == "ERR":
st.error(bad_data_msg)
elif df_bad is None:
- st.error("An unknown error was encountered.")
+ st.error("Something went wrong while loading the data.")
else:
if bad_data_msg:
st.info(bad_data_msg)
@@ -508,7 +521,7 @@ def get_report_file_data(update_progress, tr_data) -> FILE_DATA_TYPE:
file_name = f"testgen_hygiene_issue_report_{hi_id}_{profiling_time}.pdf"
with BytesIO() as buffer:
- create_report(buffer, tr_data)
+ create_report(buffer, tr_data, mask_pii=not session.auth.user_has_permission("view_pii"))
update_progress(1.0)
buffer.seek(0)
return file_name, "application/pdf", buffer.read()
diff --git a/testgen/ui/views/monitors_dashboard.py b/testgen/ui/views/monitors_dashboard.py
index 491789be..e19145a1 100644
--- a/testgen/ui/views/monitors_dashboard.py
+++ b/testgen/ui/views/monitors_dashboard.py
@@ -8,7 +8,7 @@
from testgen.commands.test_generation import run_monitor_generation
from testgen.common.freshness_service import add_business_minutes, get_schedule_params, resolve_holiday_dates
-from testgen.common.models import with_database_session
+from testgen.common.models import get_current_session, with_database_session
from testgen.common.models.notification_settings import (
MonitorNotificationSettings,
MonitorNotificationTrigger,
@@ -25,6 +25,7 @@
from testgen.ui.navigation.router import Router
from testgen.ui.queries.profiling_queries import get_tables_by_table_group
from testgen.ui.services.database_service import execute_db_query, fetch_all_from_db, fetch_one_from_db
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.session import session, temp_value
from testgen.ui.utils import dict_from_kv, get_cron_sample, get_cron_sample_handler
from testgen.ui.views.dialogs.manage_notifications import NotificationSettingsDialogBase
@@ -75,7 +76,7 @@ def render(
) -> None:
testgen.page_header(
PAGE_TITLE,
- "monitor-tables",
+ "monitor-tables/",
)
project_summary = Project.get_summary(project_code)
@@ -557,9 +558,14 @@ def on_save_settings_clicked(payload: dict) -> None:
updated_table_group = TableGroup.get(table_group.id)
updated_table_group.monitor_test_suite_id = monitor_suite.id
updated_table_group.save()
- run_monitor_generation(monitor_suite.id, ["Volume_Trend", "Schema_Drift"])
+ monitors: list[str] = ["Volume_Trend", "Schema_Drift"]
+ if updated_table_group.last_complete_profile_run_id:
+ monitors.append("Freshness_Trend")
+ # Commit needed to make test suite visible to run_monitor_generation's separate DB connection
+ get_current_session().commit()
+ run_monitor_generation(monitor_suite.id, monitors)
- st.rerun()
+ safe_rerun()
testgen.edit_monitor_settings(
key="edit_monitor_settings",
@@ -614,15 +620,14 @@ def on_delete_confirmed(*_args) -> None:
with st.spinner("Deleting monitors ..."):
monitor_suite = TestSuite.get(table_group.monitor_test_suite_id)
TestSuite.cascade_delete([monitor_suite.id])
- st.cache_data.clear()
- st.rerun()
+ safe_rerun()
except Exception:
LOG.exception("Failed to delete monitor suite")
set_result({
"success": False,
- "message": "Unable to delete monitors for the table group, try again.",
+ "message": "Something went wrong while deleting the monitors.",
})
- st.rerun(scope="fragment")
+ safe_rerun(scope="fragment")
def open_schema_changes(table_group: TableGroupMinimal, payload: dict):
@@ -1030,10 +1035,10 @@ def on_save_test_definition(payload: dict) -> None:
)
if should_close():
- st.rerun()
+ safe_rerun()
set_result({"success": True, "timestamp": datetime.now(UTC).isoformat()})
- st.rerun(scope="fragment")
+ safe_rerun(scope="fragment")
metric_test_types = TestType.select_summary_where(TestType.test_type == "Metric_Trend")
metric_test_type = metric_test_types[0] if metric_test_types else None
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
index 5a31fa3f..a1529f95 100644
--- a/testgen/ui/views/profiling_results.py
+++ b/testgen/ui/views/profiling_results.py
@@ -10,6 +10,7 @@
from testgen.common import date_service
from testgen.common.models import with_database_session
from testgen.common.models.profiling_run import ProfilingRun
+from testgen.common.pii_masking import PII_REDACTED, get_pii_columns, mask_hygiene_detail, mask_profiling_pii
from testgen.ui.components import widgets as testgen
from testgen.ui.components.widgets.download_dialog import (
FILE_DATA_TYPE,
@@ -44,12 +45,19 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str |
)
return
+ if not session.auth.user_has_project_access(run.project_code):
+ self.router.navigate_with_warning(
+ "You don't have access to view this resource. Redirecting ...",
+ "profiling-runs",
+ )
+ return
+
run_date = date_service.get_timezoned_timestamp(st.session_state, run.profiling_starttime)
session.set_sidebar_project(run.project_code)
testgen.page_header(
"Data Profiling Results",
- "investigate-profiling-results",
+ "data-profiling/investigate-profiling-results/",
breadcrumbs=[
{ "label": "Profiling Runs", "path": "profiling-runs", "params": { "project_code": run.project_code } },
{ "label": f"{run.table_groups_name} | {run_date}" },
@@ -123,6 +131,10 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str |
sorting_columns=sorting_columns,
)
+ if not session.auth.user_has_permission("view_pii"):
+ pii_columns = get_pii_columns(str(run.table_groups_id))
+ mask_profiling_pii(df, pii_columns)
+
selected, selected_row = fm.render_grid_select(
df,
["table_name", "column_name", "db_data_type", "semantic_data_type", "hygiene_issues", "result_details"],
@@ -161,6 +173,9 @@ def open_download_dialog(data: pd.DataFrame | None = None) -> None:
st.markdown(":orange[Select a row to see profiling details.]")
else:
selected_row["hygiene_issues"] = profiling_queries.get_hygiene_issues(run_id, selected_row["table_name"], selected_row.get("column_name"))
+ if not session.auth.user_has_permission("view_pii"):
+ pii_cols = get_pii_columns(selected_row["table_group_id"], table_name=selected_row["table_name"])
+ mask_hygiene_detail(selected_row["hygiene_issues"], pii_cols)
testgen_component(
"column_profiling_results",
props={ "column": json.dumps(selected_row), "data_preview": True },
@@ -190,6 +205,10 @@ def get_excel_report_data(
data = profiling_queries.get_profiling_results(run_id)
date_service.accommodate_dataframe_to_timezone(data, st.session_state)
+ if not session.auth.user_has_permission("view_pii"):
+ pii_columns = get_pii_columns(data["table_group_id"].iloc[0] if "table_group_id" in data.columns else "")
+ mask_profiling_pii(data, pii_columns)
+
for key in ["datatype_suggestion"]:
data[key] = data[key].apply(lambda val: val.lower() if not pd.isna(val) else None)
@@ -198,7 +217,7 @@ def get_excel_report_data(
for key in ["min_date", "max_date"]:
data[key] = data[key].apply(
- lambda val: parse_fuzzy_date(val) if not pd.isna(val) and val != "NaT" else None
+ lambda val: parse_fuzzy_date(val) if not pd.isna(val) and val != "NaT" and val != PII_REDACTED else val
)
data["hygiene_issues"] = data["hygiene_issues"].apply(lambda val: "Yes" if val else None)
@@ -208,13 +227,13 @@ def get_excel_report_data(
data["top_freq_values"] = data["top_freq_values"].apply(
lambda val: "\n".join([ f"{part.split(" | ")[1]} | {part.split(" | ")[0]}" for part in val[2:].split("\n| ") ])
- if val
- else None
+ if val and val != PII_REDACTED
+ else val
)
data["top_patterns"] = data["top_patterns"].apply(
lambda val: "".join([ f"{part}{'\n' if index % 2 else ' | '}" for index, part in enumerate(val.split(" | ")) ])
- if val
- else None
+ if val and val != PII_REDACTED
+ else val
)
columns = {
diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py
index 40ee4487..b59363d1 100644
--- a/testgen/ui/views/profiling_runs.py
+++ b/testgen/ui/views/profiling_runs.py
@@ -22,6 +22,7 @@
from testgen.ui.navigation.menu import MenuItem
from testgen.ui.navigation.page import Page
from testgen.ui.navigation.router import Router
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.session import session, temp_value
from testgen.ui.views.dialogs.manage_notifications import NotificationSettingsDialogBase
from testgen.ui.views.dialogs.manage_schedules import ScheduleDialog
@@ -49,7 +50,7 @@ class DataProfilingPage(Page):
def render(self, project_code: str, table_group_id: str | None = None, **_kwargs) -> None:
testgen.page_header(
PAGE_TITLE,
- "data-profiling",
+ "data-profiling/",
)
with st.spinner("Loading data ..."):
@@ -223,11 +224,11 @@ def on_delete_confirmed(*_args) -> None:
ProfilingRun.cancel_run(profiling_run.id)
send_profiling_run_notifications(ProfilingRun.get(profiling_run.id))
ProfilingRun.cascade_delete(profiling_run_ids)
- st.rerun()
+ safe_rerun()
except Exception:
LOG.exception("Failed to delete profiling runs")
set_result({
"success": False,
- "message": "Unable to delete the selected profiling runs, try again.",
+ "message": "Something went wrong while deleting the profiling runs.",
})
- st.rerun(scope="fragment")
+ safe_rerun(scope="fragment")
diff --git a/testgen/ui/views/project_dashboard.py b/testgen/ui/views/project_dashboard.py
index 0fef708e..6425378f 100644
--- a/testgen/ui/views/project_dashboard.py
+++ b/testgen/ui/views/project_dashboard.py
@@ -30,7 +30,7 @@ class ProjectDashboardPage(Page):
def render(self, project_code: str, **_kwargs):
testgen.page_header(
PAGE_TITLE,
- "project-dashboard",
+ "project-dashboard/",
)
with st.spinner("Loading data ..."):
diff --git a/testgen/ui/views/project_settings.py b/testgen/ui/views/project_settings.py
index 08f1af13..c28fc72c 100644
--- a/testgen/ui/views/project_settings.py
+++ b/testgen/ui/views/project_settings.py
@@ -1,9 +1,8 @@
-import time
+import random
import typing
-from functools import partial
+from dataclasses import asdict, dataclass, field
import streamlit as st
-from streamlit.delta_generator import DeltaGenerator
from testgen.commands.run_observability_exporter import test_observability_exporter
from testgen.common.models import with_database_session
@@ -11,7 +10,7 @@
from testgen.ui.components import widgets as testgen
from testgen.ui.navigation.menu import MenuItem
from testgen.ui.navigation.page import Page
-from testgen.ui.session import session
+from testgen.ui.session import session, temp_value
PAGE_TITLE = "Project Settings"
@@ -38,106 +37,62 @@ def render(self, project_code: str | None = None, **_kwargs) -> None:
testgen.page_header(
PAGE_TITLE,
- "manage-projects",
+ "manage-projects/",
)
- testgen.whitespace(1)
- self.show_edit_form()
-
- def show_edit_form(self) -> None:
- form_container = st.container()
- status_container = st.container()
-
- with form_container:
- with testgen.card():
- name_input = st.text_input(
- label="Project Name",
- value=self.project.project_name,
- max_chars=30,
- key="project_settings:keys:project_name",
- )
- st.text_input(
- label="Observability API URL",
- value=self.project.observability_api_url,
- key="project_settings:keys:observability_api_url",
- )
- st.text_input(
- label="Observability API Key",
- value=self.project.observability_api_key,
- key="project_settings:keys:observability_api_key",
- )
-
- testgen.whitespace(1)
- test_button_column, warning_column, save_button_column = st.columns([.4, .3, .3])
- testgen.flex_row_start(test_button_column)
- testgen.flex_row_end(save_button_column)
-
- with test_button_column:
- testgen.button(
- type_="stroked",
- color="basic",
- label="Test Observability Connection",
- width=250,
- on_click=partial(self._display_connection_status, status_container),
- key="project-settings:keys:test-connection",
- )
-
- with warning_column:
- if not name_input:
- testgen.text("Project name is required", "color: var(--red)")
- elif self.existing_names and name_input in self.existing_names:
- testgen.text("Project name in use", "color: var(--red)")
-
- with save_button_column:
- testgen.button(
- type_="flat",
- label="Save",
- width=100,
- on_click=self.edit_project,
- key="project-settings:keys:edit",
- )
+ get_test_results, set_test_results = temp_value(f"project_settings:{project_code}", default=None)
+
+ def on_observability_connection_test(payload: dict) -> None:
+ results = self.test_observability_connection(project_code, payload)
+ set_test_results(asdict(results))
+
+ return testgen.project_settings(
+ key="project_settings",
+ data={
+ "name": self.project.project_name,
+ "observability_api_url": self.project.observability_api_url,
+ "observability_api_key": self.project.observability_api_key,
+ "observability_test_results": get_test_results(),
+ },
+ on_TestObservabilityClicked_change=on_observability_connection_test,
+ on_SaveClicked_change=lambda payload: self.update_project(project_code, payload),
+ )
@with_database_session
- def edit_project(self) -> None:
- edited_project = self._get_edited_project()
- if edited_project["project_name"] and (not self.existing_names or edited_project["project_name"] not in self.existing_names):
- self.project.project_name = edited_project["project_name"]
- self.project.observability_api_url = edited_project["observability_api_url"]
- self.project.observability_api_key = edited_project["observability_api_key"]
- self.project.save()
- st.toast("Changes have been saved.")
-
- def _get_edited_project(self) -> None:
- edited_project = {
- "id": self.project.id,
- "project_code": self.project.project_code,
- }
- # We have to get the input widget values from the session state
- # The return values for st.text_input do not reflect the latest user input if the button is clicked without unfocusing the input
- # https://discuss.streamlit.io/t/issue-with-modifying-text-using-st-text-input-and-st-button/56619/5
- for key in [ "project_name", "observability_api_url", "observability_api_key" ]:
- value = st.session_state.get(f"project_settings:keys:{key}")
- edited_project[key] = value.strip() if value else None
- return edited_project
-
- def _display_connection_status(self, status_container: DeltaGenerator) -> None:
- single_element_container = status_container.empty()
- single_element_container.info("Connecting ...")
-
+ def update_project(self, project_code: str, edited_project: dict) -> None:
+ existing_names = [
+ p.project_name.lower() for p in Project.select_where(Project.project_code != project_code)
+ ]
+ new_project_name = edited_project["name"]
+ if new_project_name.lower() in existing_names:
+ raise ValueError(f"Another project named {new_project_name} exists")
+
+ self.project.project_name = new_project_name
+ self.project.observability_api_url = edited_project.get("observability_api_url")
+ self.project.observability_api_key = edited_project.get("observability_api_key")
+ self.project.save()
+ Project.clear_cache()
+
+ def test_observability_connection(self, project_code: str, edited_project: dict) -> "ObservabilityConnectionStatus":
try:
- project = self._get_edited_project()
test_observability_exporter(
- project["project_code"],
- project["observability_api_url"],
- project["observability_api_key"],
+ project_code,
+ edited_project.get("observability_api_url"),
+ edited_project.get("observability_api_key"),
)
- single_element_container.success("The connection was successful.")
+ return ObservabilityConnectionStatus(successful=True, message="The connection was successful.")
except Exception as e:
- with single_element_container.container():
- st.error("Error attempting the connection.")
- error_message = e.args[0]
- st.caption("Connection Error Details")
- with st.container(border=True):
- st.markdown(error_message)
-
- time.sleep(0.1)
+ error_message = e.args[0]
+ return ObservabilityConnectionStatus(
+ successful=False,
+ message="Error attempting the connection",
+ details=error_message,
+ )
+
+
+@dataclass(frozen=True, slots=True)
+class ObservabilityConnectionStatus:
+ message: str
+ successful: bool
+ details: str | None = field(default=None)
+ _: float = field(default_factory=random.random)
diff --git a/testgen/ui/views/quality_dashboard.py b/testgen/ui/views/quality_dashboard.py
index 4391b6d7..d8460fbc 100644
--- a/testgen/ui/views/quality_dashboard.py
+++ b/testgen/ui/views/quality_dashboard.py
@@ -28,7 +28,7 @@ class QualityDashboardPage(Page):
def render(self, *, project_code: str, **_kwargs) -> None:
project_summary = Project.get_summary(project_code)
- testgen.page_header(PAGE_TITLE, "quality-scores")
+ testgen.page_header(PAGE_TITLE, "quality-scores/")
testgen.testgen_component(
"quality_dashboard",
props={
diff --git a/testgen/ui/views/score_details.py b/testgen/ui/views/score_details.py
index fad8403f..edc8c33c 100644
--- a/testgen/ui/views/score_details.py
+++ b/testgen/ui/views/score_details.py
@@ -23,6 +23,7 @@
ScoreTypes,
SelectedIssue,
)
+from testgen.common.pii_masking import mask_hygiene_detail
from testgen.ui.components import widgets as testgen
from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data
from testgen.ui.navigation.page import Page
@@ -63,11 +64,18 @@ def render(
)
return
+ if not session.auth.user_has_project_access(score_definition.project_code):
+ self.router.navigate_with_warning(
+ "You don't have access to view this resource. Redirecting ...",
+ "quality-dashboard",
+ )
+ return
+
session.set_sidebar_project(score_definition.project_code)
testgen.page_header(
"Score Details",
- "view-score-details",
+ "quality-scores/view-score-details/",
breadcrumbs=[
{"path": "quality-dashboard", "label": "Quality Dashboard", "params": {"project_code": score_definition.project_code}},
{"label": score_definition.name},
@@ -101,10 +109,10 @@ def render(
)
score_breakdown = format_score_card_breakdown([item.to_dict() for item in score_breakdown], category)
else:
- issues = format_score_card_issues(
- score_definition.get_score_card_issues(score_type, category, drilldown),
- category,
- )
+ raw_issues = score_definition.get_score_card_issues(score_type, category, drilldown)
+ if not session.auth.user_has_permission("view_pii"):
+ mask_hygiene_detail(raw_issues)
+ issues = format_score_card_issues(raw_issues, category)
testgen.testgen_component(
"score_details",
@@ -127,7 +135,7 @@ def render(
"CategoryChanged": select_category,
"ScoreTypeChanged": select_score_type,
"IssueReportsExported": export_issue_reports,
- "ColumnProflingClicked": lambda payload: profiling_results_dialog(
+ "ColumnProfilingClicked": lambda payload: profiling_results_dialog(
payload["column_name"],
payload["table_name"],
payload["table_group_id"],
@@ -170,15 +178,20 @@ def export_issue_reports(selected_issues: list[SelectedIssue]) -> None:
def get_report_file_data(update_progress, issue) -> FILE_DATA_TYPE:
+ mask_pii = not session.auth.user_has_permission("view_pii")
+ if mask_pii:
+ issue = {**issue}
+ mask_hygiene_detail([issue])
+
with BytesIO() as buffer:
if issue["issue_type"] == "hygiene":
issue_id = issue["id"][:8]
timestamp = pd.Timestamp(issue["profiling_starttime"]).strftime("%Y%m%d_%H%M%S")
- hygiene_issue_report.create_report(buffer, issue)
+ hygiene_issue_report.create_report(buffer, issue, mask_pii=mask_pii)
else:
issue_id = issue["test_result_id"][:8]
timestamp = pd.Timestamp(issue["test_date"]).strftime("%Y%m%d_%H%M%S")
- test_result_report.create_report(buffer, issue)
+ test_result_report.create_report(buffer, issue, mask_pii=mask_pii)
update_progress(1.0)
buffer.seek(0)
@@ -230,7 +243,7 @@ def recalculate_score_history(definition_id: str) -> None:
st.toast("Scorecard trend recalculated", icon=":material/task_alt:")
except:
LOG.exception(f"Failure recalculating history for scorecard id={definition_id}")
- st.toast("Recalculating the trend failed. Try again", icon=":material/error:")
+ st.toast("Something went wrong while recalculating the trend.", icon=":material/error:")
class ScoreDropNotificationSettingsDialog(NotificationSettingsDialogBase):
diff --git a/testgen/ui/views/score_explorer.py b/testgen/ui/views/score_explorer.py
index 48c3385a..1e9352ce 100644
--- a/testgen/ui/views/score_explorer.py
+++ b/testgen/ui/views/score_explorer.py
@@ -23,6 +23,7 @@
SelectedIssue,
)
from testgen.common.models.test_run import TestRun
+from testgen.common.pii_masking import mask_hygiene_detail
from testgen.ui.components import widgets as testgen
from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data
from testgen.ui.navigation.page import Page
@@ -34,7 +35,9 @@
get_score_card_issue_reports,
get_score_category_values,
)
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.session import session, temp_value
+from testgen.ui.views.dialogs.profiling_results_dialog import profiling_results_dialog
from testgen.utils import format_score_card, format_score_card_breakdown, format_score_card_issues, try_json
PAGE_PATH = "quality-dashboard:explorer"
@@ -74,10 +77,18 @@ def render(
return
project_code = original_score_definition.project_code
+
+ if not session.auth.user_has_project_access(project_code):
+ self.router.navigate_with_warning(
+ "You don't have access to view this resource. Redirecting ...",
+ "quality-dashboard",
+ )
+ return
+
page_title = "Edit Scorecard"
last_breadcrumb = original_score_definition.name
- testgen.page_header(page_title, "explore-and-create-scorecards", breadcrumbs=[
+ testgen.page_header(page_title, "quality-scores/explore-and-create-scorecards/", breadcrumbs=[
{"path": "quality-dashboard", "label": "Quality Dashboard", "params": {"project_code": project_code}},
{"label": last_breadcrumb},
])
@@ -145,10 +156,10 @@ def render(
breakdown_category,
)
if drilldown:
- issues = format_score_card_issues(
- score_definition.get_score_card_issues(breakdown_score_type, breakdown_category, drilldown),
- breakdown_category,
- )
+ raw_issues = score_definition.get_score_card_issues(breakdown_score_type, breakdown_category, drilldown)
+ if not session.auth.user_has_permission("view_pii"):
+ mask_hygiene_detail(raw_issues)
+ issues = format_score_card_issues(raw_issues, breakdown_category)
score_definition_dict = score_definition.to_dict()
testgen.testgen_component(
@@ -173,6 +184,11 @@ def render(
"ScoreTypeChanged": set_breakdown_score_type,
"DrilldownChanged": set_breakdown_drilldown,
"IssueReportsExported": export_issue_reports,
+ "ColumnProfilingClicked": lambda payload: profiling_results_dialog(
+ payload["column_name"],
+ payload["table_name"],
+ payload["table_group_id"],
+ ),
"ScoreDefinitionSaved": save_score_definition,
"ColumnSelectorOpened": partial(column_selector_dialog, project_code, score_definition_dict),
"FilterModeChanged": change_score_definition_filter_mode,
@@ -231,15 +247,20 @@ def export_issue_reports(selected_issues: list[SelectedIssue]) -> None:
def get_report_file_data(update_progress, issue) -> FILE_DATA_TYPE:
+ mask_pii = not session.auth.user_has_permission("view_pii")
+ if mask_pii:
+ issue = {**issue}
+ mask_hygiene_detail([issue])
+
with BytesIO() as buffer:
if issue["issue_type"] == "hygiene":
issue_id = issue["id"][:8]
timestamp = pd.Timestamp(issue["profiling_starttime"]).strftime("%Y%m%d_%H%M%S")
- hygiene_issue_report.create_report(buffer, issue)
+ hygiene_issue_report.create_report(buffer, issue, mask_pii=mask_pii)
else:
issue_id = issue["test_result_id"][:8]
timestamp = pd.Timestamp(issue["test_date"]).strftime("%Y%m%d_%H%M%S")
- test_result_report.create_report(buffer, issue)
+ test_result_report.create_report(buffer, issue, mask_pii=mask_pii)
update_progress(1.0)
buffer.seek(0)
@@ -253,7 +274,7 @@ def column_selector_dialog(project_code: str, score_definition_dict: dict, _) ->
def dialog_content() -> None:
if not is_column_selector_opened():
- st.rerun()
+ safe_rerun()
selected_filters = set()
if score_definition_dict.get("filter_by_columns"):
diff --git a/testgen/ui/views/table_groups.py b/testgen/ui/views/table_groups.py
index 9f81a2f5..058cc975 100644
--- a/testgen/ui/views/table_groups.py
+++ b/testgen/ui/views/table_groups.py
@@ -9,7 +9,7 @@
from testgen.commands.run_profiling import run_profiling_in_background
from testgen.commands.test_generation import run_monitor_generation
-from testgen.common.models import with_database_session
+from testgen.common.models import get_current_session, with_database_session
from testgen.common.models.connection import Connection
from testgen.common.models.project import Project
from testgen.common.models.scheduler import RUN_MONITORS_JOB_KEY, RUN_TESTS_JOB_KEY, JobSchedule
@@ -19,6 +19,7 @@
from testgen.ui.navigation.menu import MenuItem
from testgen.ui.navigation.page import Page
from testgen.ui.queries import table_group_queries
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.session import session, temp_value
from testgen.ui.utils import get_cron_sample_handler
from testgen.ui.views.connections import FLAVOR_OPTIONS, format_connection
@@ -49,7 +50,7 @@ def render(
table_group_name: str | None = None,
**_kwargs,
) -> None:
- testgen.page_header(PAGE_TITLE, "manage-table-groups")
+ testgen.page_header(PAGE_TITLE, "connect-your-database/manage-table-groups/")
user_can_edit = session.auth.user_has_permission("edit")
project_summary = Project.get_summary(project_code)
@@ -165,7 +166,7 @@ def on_close_clicked(_params: dict) -> None:
get_close_dialog, set_close_dialog = temp_value("table_groups:close:new", default=False)
if (get_close_dialog()):
- st.rerun()
+ safe_rerun()
should_preview, mark_for_preview = temp_value("table_groups:preview:new", default=False)
should_verify_access, mark_for_access_preview = temp_value("table_groups:preview_access:new", default=False)
@@ -308,6 +309,8 @@ def on_close_clicked(_params: dict) -> None:
predict_holiday_codes=monitor_test_suite_data.get("predict_holiday_codes") or None,
)
monitor_test_suite.save()
+ # Commit needed to make test suite visible to run_monitor_generation's separate DB connection
+ get_current_session().commit()
run_monitor_generation(monitor_test_suite.id, ["Volume_Trend", "Schema_Drift"])
JobSchedule(
@@ -334,7 +337,11 @@ def on_close_clicked(_params: dict) -> None:
message = "Profiling run encountered errors"
LOG.exception(message)
+ if table_group_id and success:
+ safe_rerun()
+
except IntegrityError:
+ get_current_session().rollback()
success = False
message = "A Table Group with the same name already exists."
else:
@@ -348,6 +355,9 @@ def on_close_clicked(_params: dict) -> None:
"connections": connections,
"table_group": table_group.to_dict(json_safe=True),
"is_in_use": is_table_group_used,
+ "permissions": {
+ "can_view_pii": session.auth.user_has_permission("view_pii"),
+ },
"table_group_preview": table_group_preview,
"steps": steps,
"results": {
@@ -418,7 +428,7 @@ def on_delete_confirmed(*_args):
if not TableGroup.has_running_process([table_group_id]):
TableGroup.cascade_delete([table_group_id])
message = f"Table Group {table_group.table_groups_name} has been deleted. "
- st.rerun()
+ safe_rerun()
else:
message = "This Table Group is in use by a running process and cannot be deleted."
result = {"success": success, "message": message}
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index c494deed..af1b4b43 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -7,7 +7,8 @@
import pandas as pd
import streamlit as st
-from sqlalchemy import and_, asc, desc, func, or_, tuple_
+from sqlalchemy import and_, asc, case, desc, func, or_, tuple_
+from sqlalchemy import select as sa_select
from streamlit.delta_generator import DeltaGenerator
from streamlit_extras.no_default_selectbox import selectbox
@@ -17,7 +18,7 @@
from testgen.common.models import with_database_session
from testgen.common.models.connection import Connection
from testgen.common.models.table_group import TableGroup, TableGroupMinimal
-from testgen.common.models.test_definition import TestDefinition, TestDefinitionMinimal, TestDefinitionSummary
+from testgen.common.models.test_definition import TestDefinition, TestDefinitionMinimal, TestDefinitionNote
from testgen.common.models.test_suite import TestSuite, TestSuiteMinimal
from testgen.ui.components import widgets as testgen
from testgen.ui.components.widgets.download_dialog import (
@@ -29,10 +30,12 @@
from testgen.ui.components.widgets.page import css_class, flex_row_end
from testgen.ui.navigation.page import Page
from testgen.ui.services.database_service import fetch_all_from_db, fetch_df_from_db, fetch_from_target_db
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.services.string_service import empty_if_null, snake_case_to_title_case
from testgen.ui.session import session, temp_value
from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button
from testgen.ui.views.dialogs.run_tests_dialog import run_tests_dialog
+from testgen.ui.views.dialogs.test_definition_notes_dialog import test_definition_notes_dialog
from testgen.utils import to_dataframe
LOG = logging.getLogger("testgen")
@@ -51,6 +54,7 @@ def render(
table_name: str | None = None,
column_name: str | None = None,
test_type: str | None = None,
+ flagged: str | None = None,
**_kwargs,
) -> None:
test_suite = TestSuite.get(test_suite_id)
@@ -62,20 +66,28 @@ def render(
table_group = TableGroup.get_minimal(test_suite.table_groups_id)
project_code = table_group.project_code
+
+ if not session.auth.user_has_project_access(project_code):
+ self.router.navigate_with_warning(
+ "You don't have access to view this resource. Redirecting ...",
+ "test-suites",
+ )
+ return
+
session.set_sidebar_project(project_code)
user_can_edit = session.auth.user_has_permission("edit")
user_can_disposition = session.auth.user_has_permission("disposition")
testgen.page_header(
"Test Definitions",
- "test-definitions",
+ "generate-tests/test-definitions/",
breadcrumbs=[
{ "label": "Test Suites", "path": "test-suites", "params": { "project_code": project_code } },
{ "label": test_suite.test_suite },
],
)
- table_filter_column, column_filter_column, test_filter_column, sort_column, table_actions_column = st.columns([.2, .2, .2, .1, .25], vertical_alignment="bottom")
+ table_filter_column, column_filter_column, test_filter_column, flagged_filter_column, sort_column, table_actions_column = st.columns([.2, .2, .15, .1, .1, .25], vertical_alignment="bottom")
testgen.flex_row_end(table_actions_column)
actions_column, disposition_column = st.columns([.5, .5])
@@ -83,7 +95,7 @@ def render(
testgen.flex_row_end(disposition_column)
filters_changed = False
- current_filters = (table_name, column_name, test_type)
+ current_filters = (table_name, column_name, test_type, flagged)
if (query_filters := st.session_state.get("test_definitions:filters")) != current_filters:
if query_filters:
filters_changed = True
@@ -124,13 +136,23 @@ def render(
label="Test Type",
)
+ with flagged_filter_column:
+ flagged = testgen.select(
+ options=["Flagged", "Not Flagged"],
+ default_value=flagged,
+ bind_to_query="flagged",
+ label="Flagged",
+ )
+
with sort_column:
sortable_columns = (
+ ("Flagged", "flagged"),
+ ("Has Notes", "notes_count"),
("Table", "table_name"),
("Column", "column_name"),
("Test Type", "test_type"),
)
- default = [(sortable_columns[i][1], "ASC") for i in (0, 1, 2)]
+ default = [(sortable_columns[i][1], "ASC") for i in (2, 3, 4)]
sorting_columns = testgen.sorting_selector(sortable_columns, default)
if user_can_disposition:
@@ -152,7 +174,11 @@ def render(
with st.container():
with st.spinner("Loading data ..."):
- df = get_test_definitions(test_suite, table_name, column_name, test_type, sorting_columns)
+ df = get_test_definitions(test_suite, table_name, column_name, test_type, sorting_columns, flagged)
+
+ if df.empty:
+ st.info("No test definitions found.")
+ return
selected, selected_test_def = render_grid(df, multi_select, filters_changed)
@@ -193,6 +219,11 @@ def open_download_dialog(data: pd.DataFrame | None = None) -> None:
{ "icon": "🔐", "help": "Unlock for future test generation", "attribute": "lock_refresh", "value": False, "message": "Unlocked" },
])
+ disposition_actions.extend([
+ { "icon": "🚩", "help": "Flag for attention", "attribute": "flagged", "value": True, "message": "Flagged" },
+ { "icon": "⌀", "help": "Clear flag", "attribute": "flagged", "value": False, "message": "Flag cleared" },
+ ])
+
for action in disposition_actions:
action_disabled = not selected or all(sel[action["attribute"]] == action["value"] for sel in selected)
action["button"] = disposition_column.button(action["icon"], help=action["help"], disabled=action_disabled)
@@ -207,10 +238,19 @@ def open_download_dialog(data: pd.DataFrame | None = None) -> None:
fm.reset_post_updates(
update_test_definition(selected, action["attribute"], action["value"], action["message"]),
as_toast=True,
- clear_cache=True,
- lst_cached_functions=[],
)
+ if actions_column.button(
+ ":material/sticky_note_2: Notes",
+ disabled=not selected or len(selected) != 1,
+ help="View and add notes for this test definition",
+ ):
+ row = selected[0]
+ test_definition_notes_dialog(
+ str(row["id"]),
+ {"table": row["table_name"], "column": row["column_name"], "test": row["test_name_short"]},
+ )
+
if user_can_edit:
if actions_column.button(
":material/edit: Edit",
@@ -241,6 +281,8 @@ def render_grid(df: pd.DataFrame, multi_select: bool, filters_changed: bool) ->
"test_name_short",
"test_active_display",
"lock_refresh_display",
+ "flagged_display",
+ "notes_display",
"urgency",
"export_to_observability_display",
"profiling_as_of_date",
@@ -258,6 +300,8 @@ def render_grid(df: pd.DataFrame, multi_select: bool, filters_changed: bool) ->
"Test Type",
"Active",
"Locked",
+ "Flagged",
+ "Notes",
"Urgency",
"Export to Observabilty",
"Based on Profiling",
@@ -282,6 +326,7 @@ def render_selected_details(selected_test: dict, table_group: TableGroupMinimal)
"test_active_display",
"test_definition_status",
"lock_refresh_display",
+ "flagged_display",
"urgency",
"export_to_observability",
]
@@ -294,6 +339,7 @@ def render_selected_details(selected_test: dict, table_group: TableGroupMinimal)
"test_active",
"test_definition_status",
"lock_refresh",
+ "flagged",
"urgency",
"export_to_observability",
]
@@ -352,7 +398,7 @@ def delete_test_dialog(test_definitions: list[dict]):
TestDefinition.delete_where(TestDefinition.id.in_([ item["id"] for item in test_definitions ]))
st.success("Test definitions have been deleted.")
time.sleep(1)
- st.rerun()
+ safe_rerun()
def show_test_form_by_id(test_definition_id):
@@ -426,6 +472,7 @@ def show_test_form(
skip_errors = selected_test_def["skip_errors"] or 0 if mode == "edit" else 0
test_active = bool(selected_test_def["test_active"]) if mode == "edit" else True
lock_refresh = bool(selected_test_def["lock_refresh"]) if mode == "edit" else False
+ test_flagged = bool(selected_test_def["flagged"]) if mode == "edit" else False
test_definition_status = selected_test_def["test_definition_status"] if mode == "edit" else ""
column_name = empty_if_null(selected_test_def["column_name"]) if mode == "edit" else empty_if_null(column_name)
last_auto_gen_date = empty_if_null(selected_test_def["last_auto_gen_date"]) if mode == "edit" else ""
@@ -531,6 +578,7 @@ def show_test_form(
help="Protects test parameters from being overwritten when tests in this Test Suite are regenerated.",
),
"test_active": left_column.toggle(label="Test Active", value=test_active),
+ "flagged": left_column.toggle(label="Flagged", value=test_flagged, help="Flag this test for attention."),
"custom_query": custom_query,
"baseline_ct": baseline_ct,
"baseline_unique_ct": baseline_unique_ct,
@@ -600,7 +648,7 @@ def show_test_form(
with container:
testgen.link(
href="profiling-runs:results",
- params={"run_id": str(profile_run_id)},
+ params={"run_id": str(profile_run_id), "project_code": table_group.project_code},
label=formatted_time,
open_new=True,
)
@@ -618,7 +666,7 @@ def show_test_form(
st.divider()
- has_match_attributes = any(attribute.startswith("match_") for attribute in dynamic_attributes)
+ has_match_attributes = "match_schema_name" in dynamic_attributes or "match_table_name" in dynamic_attributes
left_column, right_column = st.columns([0.5, 0.5]) if has_match_attributes else (st.container(), None)
test_definition["schema_name"] = left_column.text_input(
@@ -740,14 +788,14 @@ def render_dynamic_attribute(attribute: str, container: DeltaGenerator):
value = None
placeholder = "Max"
disabled = True
-
+
if test_definition.get("history_calculation") == "Value" and (
"history_calculation_upper" not in dynamic_attributes
or test_definition.get("history_calculation_upper") == "Value"
):
value = 1
disabled = True
-
+
test_definition[attribute] = container.number_input(
label=label_text,
step=1,
@@ -872,8 +920,7 @@ def render_dynamic_attribute(attribute: str, container: DeltaGenerator):
if mode == "edit":
test_definition["id"] = selected_test_def["id"]
TestDefinition(**test_definition).save()
- get_test_suite_columns.clear()
- st.rerun()
+ safe_rerun()
@st.dialog(title="Add Test")
@@ -950,12 +997,15 @@ def copy_move_test_dialog(
movable_test_definitions = []
if target_table_group_id and target_test_suite_id:
- collision_test_definitions = get_test_definitions_collision(selected_test_definitions, target_table_group_id, target_test_suite_id)
+ collision_test_definitions = get_test_definitions_collision(selected_test_definitions, target_table_group_id, target_test_suite_id, target_table_name, target_column_name)
+ overwrite_ids = []
if not collision_test_definitions.empty:
unlocked = collision_test_definitions[collision_test_definitions["lock_refresh"] == False]
locked = collision_test_definitions[collision_test_definitions["lock_refresh"] == True]
locked_tuples = [ (test["table_name"], test["column_name"], test["test_type"]) for test in locked.iterrows() ]
movable_test_definitions = [ test for test in selected_test_definitions if (test["table_name"], test["column_name"], test["test_type"]) not in locked_tuples ]
+ selected_ids = {str(item["id"]) for item in selected_test_definitions}
+ overwrite_ids = [id_ for id_ in unlocked["id"].tolist() if str(id_) not in selected_ids]
warning_message = f"""Auto-generated tests are present in the target test suite for the same column-test type combinations as the selected tests.
\nUnlocked tests that will be overwritten: {len(unlocked)}
@@ -981,19 +1031,21 @@ def copy_move_test_dialog(
test_definition_ids = [item["id"] for item in movable_test_definitions]
if move:
+ if overwrite_ids:
+ TestDefinition.delete_where(TestDefinition.id.in_(overwrite_ids))
TestDefinition.move(test_definition_ids, target_table_group_id, target_test_suite_id, target_table_name, target_column_name)
success_message = "Test Definitions have been moved."
st.success(success_message)
- get_test_suite_columns.clear()
time.sleep(1)
- st.rerun()
+ safe_rerun()
elif copy:
+ if overwrite_ids:
+ TestDefinition.delete_where(TestDefinition.id.in_(overwrite_ids))
TestDefinition.copy(test_definition_ids, target_table_group_id, target_test_suite_id, target_table_name, target_column_name)
success_message = "Test Definitions have been copied."
st.success(success_message)
- get_test_suite_columns.clear()
time.sleep(1)
- st.rerun()
+ safe_rerun()
def validate_form(test_scope, test_definition, column_name_label):
if test_scope in ["column", "referential", "custom"] and not test_definition["column_name"]:
@@ -1052,7 +1104,7 @@ def confirm_unlocking_test_definition(test_definitions: list[dict]):
if unlock_confirmed():
update_test_definition(test_definitions, "lock_refresh", False, "Test definitions have been unlocked.")
time.sleep(1)
- st.rerun()
+ safe_rerun()
_, button_column = st.columns([.85, .15])
with button_column:
@@ -1085,7 +1137,7 @@ def get_excel_report_data(
else:
data = get_test_definitions(test_suite)
- for key in ["test_active_display", "lock_refresh_display"]:
+ for key in ["test_active_display", "lock_refresh_display", "flagged_display"]:
data[key] = data[key].apply(lambda val: val if val == "Yes" else None)
for key in ["profiling_as_of_date", "last_manual_update"]:
@@ -1102,6 +1154,7 @@ def get_excel_report_data(
"export_uom": {"header": "Unit of measure"},
"test_active_display": {"header": "Active"},
"lock_refresh_display": {"header": "Locked"},
+ "flagged_display": {"header": "Flagged"},
"urgency": {"header": "Severity"},
"profiling_as_of_date": {"header": "From profiling as-of (UTC)"},
"last_manual_update": {"header": "Last manual update (UTC)"},
@@ -1222,6 +1275,7 @@ def get_test_definitions(
column_name: str | None = None,
test_type: str | None = None,
sorting_columns: list[str] | None = None,
+ flagged_filter: str | None = None,
) -> pd.DataFrame:
clauses = [TestDefinition.test_suite_id == test_suite.id]
if table_name:
@@ -1230,23 +1284,56 @@ def get_test_definitions(
clauses.append(TestDefinition.column_name.ilike(column_name))
if test_type:
clauses.append(TestDefinition.test_type == test_type)
+ if flagged_filter == "Flagged":
+ clauses.append(TestDefinition.flagged == True)
+ elif flagged_filter == "Not Flagged":
+ clauses.append(TestDefinition.flagged == False)
sort_funcs = {"ASC": asc, "DESC": desc}
+
+ notes_count_expr = (
+ sa_select(func.count(TestDefinitionNote.id))
+ .where(TestDefinitionNote.test_definition_id == TestDefinition.id)
+ .correlate(TestDefinition)
+ .scalar_subquery()
+ )
+
+ sort_expressions = {
+ "flagged": lambda d: sort_funcs[d](case((TestDefinition.flagged == True, 0), else_=1)),
+ "notes_count": lambda d: sort_funcs[d](case((notes_count_expr > 0, 0), else_=1)),
+ }
+
+ order_by = []
+ if sorting_columns:
+ for (attribute, direction) in sorting_columns:
+ if attribute in sort_expressions:
+ order_by.append(sort_expressions[attribute](direction))
+ else:
+ order_by.append(sort_funcs[direction](func.lower(getattr(TestDefinition, attribute))))
+
test_definitions = TestDefinition.select_where(
*clauses,
- order_by=tuple([
- sort_funcs[direction](func.lower(getattr(TestDefinition, attribute)))
- for (attribute, direction) in sorting_columns
- ]) if sorting_columns else None,
+ order_by=tuple(order_by) if order_by else None,
)
- df = to_dataframe(test_definitions, TestDefinitionSummary.columns())
+ df = to_dataframe(test_definitions)
+ if df.empty:
+ return df
+
date_service.accommodate_dataframe_to_timezone(df, st.session_state)
for key in ["id", "table_groups_id", "profile_run_id", "test_suite_id"]:
df[key] = df[key].apply(lambda value: str(value))
df["test_active_display"] = df["test_active"].apply(lambda value: "Yes" if value else "No")
df["lock_refresh_display"] = df["lock_refresh"].apply(lambda value: "Yes" if value else "No")
+ df["flagged_display"] = df["flagged"].apply(lambda value: "Yes" if value else "No")
+
+ if not df.empty:
+ notes_counts = TestDefinitionNote.get_notes_count_by_ids([str(td_id) for td_id in df["id"]])
+ df["notes_count"] = df["id"].map(notes_counts).fillna(0).astype(int)
+ else:
+ df["notes_count"] = pd.Series(dtype=int)
+ df["notes_display"] = df["notes_count"].apply(lambda x: f"📝 {x}" if x > 0 else "")
df["urgency"] = df.apply(lambda row: row["severity"] or test_suite.severity or row["default_severity"], axis=1)
df["final_test_description"] = df.apply(lambda row: row["test_description"] or row["default_test_description"], axis=1)
df["export_uom"] = df.apply(lambda row: row["measure_uom_description"] or row["measure_uom"], axis=1)
@@ -1267,9 +1354,11 @@ def get_test_definitions_collision(
test_definitions: list[dict],
target_table_group_id: str,
target_test_suite_id: str,
+ target_table_name: str | None = None,
+ target_column_name: str | None = None,
) -> pd.DataFrame:
- table_tests = [(item["table_name"], item["test_type"]) for item in test_definitions if item["column_name"] is None and item["table_name"] is not None]
- column_tests = [(item["table_name"], item["column_name"], item["test_type"]) for item in test_definitions if item["column_name"] is not None]
+ table_tests = [(target_table_name or item["table_name"], item["test_type"]) for item in test_definitions if item["column_name"] is None and item["table_name"] is not None]
+ column_tests = [(target_table_name or item["table_name"], target_column_name or item["column_name"], item["test_type"]) for item in test_definitions if item["column_name"] is not None]
results = TestDefinition.select_minimal_where(
TestDefinition.table_groups_id == target_table_group_id,
TestDefinition.test_suite_id == target_test_suite_id,
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index f6065121..ff8a1188 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -41,7 +41,8 @@
from testgen.ui.services.database_service import execute_db_query, fetch_df_from_db, fetch_one_from_db
from testgen.ui.services.string_service import snake_case_to_title_case
from testgen.ui.session import session
-from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button
+from testgen.ui.views.dialogs.profiling_results_dialog import profiling_results_dialog
+from testgen.ui.views.dialogs.test_definition_notes_dialog import test_definition_notes_dialog
from testgen.ui.views.test_definitions import show_test_form_by_id
from testgen.utils import friendly_score, str_to_timestamp
@@ -63,6 +64,7 @@ def render(
column_name: str | None = None,
test_type: str | None = None,
action: str | None = None,
+ flagged: str | None = None,
**_kwargs,
) -> None:
run = TestRun.get_minimal(run_id)
@@ -73,28 +75,34 @@ def render(
)
return
+ if not session.auth.user_has_project_access(run.project_code):
+ self.router.navigate_with_warning(
+ "You don't have access to view this resource. Redirecting ...",
+ "test-runs",
+ )
+ return
+
run_date = date_service.get_timezoned_timestamp(st.session_state, run.test_starttime)
session.set_sidebar_project(run.project_code)
testgen.page_header(
"Test Results",
- "investigate-test-results",
+ "data-quality-testing/investigate-test-results/",
breadcrumbs=[
{ "label": "Test Runs", "path": "test-runs", "params": { "project_code": run.project_code } },
{ "label": f"{run.test_suite} | {run_date}" },
],
)
- summary_column, score_column, actions_column, export_button_column = st.columns([.3, .15, .3, .15], vertical_alignment="bottom")
- status_filter_column, table_filter_column, column_filter_column, test_type_filter_column, action_filter_column, sort_column = st.columns(
- [.175, .2, .2, .175, .15, .1], vertical_alignment="bottom"
+ summary_column, score_column, export_button_column = st.columns([.35, .15, .5], vertical_alignment="bottom")
+ status_filter_column, table_filter_column, column_filter_column, test_type_filter_column, flagged_filter_column, action_filter_column, sort_column = st.columns(
+ [.15, .175, .175, .15, .1, .15, .1], vertical_alignment="bottom"
)
- testgen.flex_row_end(actions_column, wrap=True)
testgen.flex_row_end(export_button_column)
filters_changed = False
- current_filters = (status, table_name, column_name, test_type, action)
+ current_filters = (status, table_name, column_name, test_type, flagged, action)
if (query_filters := st.session_state.get("test_results:filters")) != current_filters:
if query_filters:
filters_changed = True
@@ -157,6 +165,14 @@ def render(
label="Test Type",
)
+ with flagged_filter_column:
+ flagged = testgen.select(
+ options=["Flagged", "Not Flagged"],
+ default_value=flagged,
+ bind_to_query="flagged",
+ label="Flagged",
+ )
+
with action_filter_column:
action = testgen.select(
options=["✓ Confirmed", "✘ Dismissed", "🔇 Muted", "↩︎ No Action"],
@@ -168,6 +184,8 @@ def render(
with sort_column:
sortable_columns = (
+ ("Flagged", "CASE WHEN td.flagged THEN 0 ELSE 1 END"),
+ ("Has Notes", "CASE WHEN (SELECT COUNT(*) FROM test_definition_notes tdn WHERE tdn.test_definition_id = td.id) > 0 THEN 0 ELSE 1 END"),
("Table", "LOWER(r.table_name)"),
("Columns/Focus", "LOWER(r.column_names)"),
("Test Type", "r.test_type"),
@@ -176,10 +194,16 @@ def render(
("Status", "result_status"),
("Action", "r.disposition"),
)
- default = [(sortable_columns[i][1], "ASC") for i in (0, 1, 2)]
+ default = [(sortable_columns[i][1], "ASC") for i in (2, 3, 4)]
sorting_columns = testgen.sorting_selector(sortable_columns, default)
- with actions_column:
+ actions_column, disposition_column = st.columns([.5, .5])
+ testgen.flex_row_start(actions_column)
+ testgen.flex_row_end(disposition_column)
+
+ user_can_edit = session.auth.user_has_permission("edit")
+
+ with disposition_column:
multi_select = st.toggle(
"Multi-Select",
help="Toggle on to perform actions on multiple results",
@@ -196,8 +220,9 @@ def render(
with st.container():
with st.spinner("Loading data ..."):
# Retrieve test results (always cached, action as null)
+ flagged_bool = True if flagged == "Flagged" else False if flagged == "Not Flagged" else None
df = test_result_queries.get_test_results(
- run_id, status, test_type, table_name, column_name, action, sorting_columns
+ run_id, status, test_type, table_name, column_name, action, sorting_columns, flagged_bool
)
# Retrieve disposition action (cache refreshed)
df_action = get_test_disposition(run_id)
@@ -209,6 +234,18 @@ def render(
action_map = df_action.set_index("id")["action"].to_dict()
df["action"] = df["test_result_id"].map(action_map).fillna(df["action"])
+ def build_review_column(row):
+ parts = []
+ if row["action"]:
+ parts.append(row["action"])
+ if row["flagged"]:
+ parts.append("🚩")
+ if row.get("notes_count", 0) > 0:
+ parts.append(f"📝 {row['notes_count']}")
+ return " · ".join(parts)
+
+ df["review"] = df.apply(build_review_column, axis=1) if not df.empty else ""
+
test_suite = TestSuite.get_minimal(run.test_suite_id)
table_group = TableGroup.get_minimal(test_suite.table_groups_id)
@@ -221,7 +258,7 @@ def render(
"result_measure",
"measure_uom",
"result_status",
- "action",
+ "review",
"result_message",
],
[
@@ -231,13 +268,14 @@ def render(
"Result Measure",
"Unit of Measure",
"Status",
- "Action",
+ "Review",
"Details",
],
id_column="test_result_id",
selection_mode="multiple" if multi_select else "single",
reset_pagination=filters_changed,
bind_to_query=True,
+ column_styles={"review": {"textAlign": "center", "fontSize": "1.1em"}},
)
popover_container = export_button_column.empty()
@@ -268,7 +306,90 @@ def open_download_dialog(data: pd.DataFrame | None = None) -> None:
)
# Need to render toolbar buttons after grid, so selection status is maintained
- affected_cached_functions = [get_test_disposition, test_result_queries.get_test_results]
+ # === Action buttons (left side, near the grid) ===
+
+ if actions_column.button(
+ ":material/sticky_note_2: Notes",
+ disabled=not selected or len(selected) != 1,
+ help="View and add notes for this test definition",
+ ):
+ row = selected[0]
+ test_definition_notes_dialog(
+ str(row["test_definition_id"]),
+ {"table": row["table_name"], "column": row["column_names"], "test": row["test_name_short"]},
+ )
+
+ if actions_column.button(
+ ":material/edit: Edit Test",
+ disabled=not selected_row or not user_can_edit,
+ help="Edit the Test Definition",
+ ):
+ show_test_form_by_id(selected_row["test_definition_id"])
+
+ if actions_column.button(
+ ":material/visibility: Source Data",
+ disabled=not selected_row,
+ help="View current source data for highlighted result",
+ ):
+ MixpanelService().send_event(
+ "view-source-data",
+ page=PAGE_PATH,
+ test_type=selected_row["test_name_short"],
+ )
+ source_data_dialog(selected_row)
+
+ can_view_profiling = (
+ selected_row
+ and selected_row.get("test_scope") == "column"
+ and selected_row.get("column_names") not in (None, "(multi-column)", "N/A")
+ and selected_row.get("table_name") not in (None, "(multi-table)")
+ )
+ if actions_column.button(
+ ":material/insert_chart: Profiling",
+ disabled=not can_view_profiling,
+ help="View profiling for highlighted column",
+ ):
+ profiling_results_dialog(
+ selected_row["column_names"],
+ selected_row["table_name"],
+ selected_row["table_groups_id"],
+ )
+
+ report_eligible_rows = [
+ row for row in selected
+ if row["result_status"] != "Passed" and row["disposition"] in (None, "Confirmed")
+ ] if selected else []
+ report_btn_help = (
+ "Generate PDF reports for the selected results that are not muted or dismissed and are not Passed"
+ if multi_select
+ else "Generate PDF report for selected result"
+ )
+ if actions_column.button(
+ ":material/download: Issue Report",
+ disabled=not report_eligible_rows,
+ help=report_btn_help,
+ ):
+ MixpanelService().send_event(
+ "download-issue-report",
+ page=PAGE_PATH,
+ issue_count=len(report_eligible_rows),
+ )
+ dialog_title = "Download Issue Report"
+ if len(report_eligible_rows) == 1:
+ download_dialog(
+ dialog_title=dialog_title,
+ file_content_func=get_report_file_data,
+ args=(report_eligible_rows[0],),
+ )
+ else:
+ zip_func = zip_multi_file_data(
+ "testgen_test_issue_reports.zip",
+ get_report_file_data,
+ [(arg,) for arg in selected],
+ )
+ download_dialog(dialog_title=dialog_title, file_content_func=zip_func)
+
+ # === Disposition buttons (right side) ===
disposition_actions = [
{ "icon": "✓", "help": "Confirm this issue as relevant for this run", "status": "Confirmed" },
@@ -286,7 +407,7 @@ def open_download_dialog(data: pd.DataFrame | None = None) -> None:
or sel["result_status"] == "Passed"
for sel in selected
)
- action["button"] = actions_column.button(action["icon"], help=action["help"], disabled=disable_dispo)
+ action["button"] = disposition_column.button(action["icon"], help=action["help"], disabled=disable_dispo)
# This has to be done as a second loop - otherwise, the rest of the buttons after the clicked one are not displayed briefly while refreshing
for action in disposition_actions:
@@ -294,8 +415,24 @@ def open_download_dialog(data: pd.DataFrame | None = None) -> None:
fm.reset_post_updates(
do_disposition_update(selected, action["status"]),
as_toast=True,
- clear_cache=True,
- lst_cached_functions=affected_cached_functions,
+ )
+
+ if session.auth.user_has_permission("disposition"):
+ flag_actions = [
+ { "icon": "🚩", "help": "Flag test for attention", "value": True, "message": "Flagged" },
+ { "icon": "⌀", "help": "Clear flag", "value": False, "message": "Flag cleared" },
+ ]
+ for flag_action in flag_actions:
+ flag_disabled = not selected or all(sel["flagged"] == flag_action["value"] for sel in selected)
+ flag_action["button"] = disposition_column.button(flag_action["icon"], help=flag_action["help"], disabled=flag_disabled)
+
+ for flag_action in flag_actions:
+ if flag_action["button"]:
+ test_definition_ids = list({row["test_definition_id"] for row in selected})
+ TestDefinition.set_status_attribute("flagged", test_definition_ids, flag_action["value"])
+ fm.reset_post_updates(
+ None,
+ as_toast=True,
)
# Needs to be after all data loading/updating
@@ -303,14 +440,8 @@ def open_download_dialog(data: pd.DataFrame | None = None) -> None:
with score_column:
render_score(run.project_code, run_id)
- if selected:
- render_selected_details(
- selected,
- selected_row,
- test_suite,
- session.auth.user_has_permission("edit"),
- multi_select,
- )
+ if selected_row:
+ render_selected_details(selected_row, test_suite)
@st.fragment
@@ -438,11 +569,11 @@ def get_test_result_summary(test_run_id: str) -> list[dict]:
def show_test_def_detail(test_definition_id: str, test_suite: TestSuiteMinimal):
def readable_boolean(v: bool):
return "Yes" if v else "No"
-
+
if not test_definition_id:
st.warning("Test definition no longer exists.")
return
-
+
test_definition = TestDefinition.get(test_definition_id)
if test_definition:
@@ -506,109 +637,34 @@ def readable_boolean(v: bool):
@with_database_session
def render_selected_details(
- selected_rows: list[dict],
selected_item: dict,
test_suite: TestSuiteMinimal,
- user_can_edit: bool,
- multi_select: bool = False,
) -> None:
- if not selected_rows:
- st.markdown(":orange[Select a record to see more information.]")
- else:
- pg_col1, pg_col2 = st.columns([0.5, 0.5])
-
- with pg_col2:
- v_col1, v_col2, v_col3, v_col4 = st.columns([.25, .25, .25, .25])
-
- if selected_item:
- dfh = test_result_queries.get_test_result_history(selected_item)
- show_hist_columns = ["test_date", "threshold_value", "result_measure", "result_status"]
-
- time_columns = ["test_date"]
- date_service.accommodate_dataframe_to_timezone(dfh, st.session_state, time_columns)
-
- if user_can_edit:
- view_edit_test(v_col1, selected_item["test_definition_id"])
-
- if selected_item["test_scope"] == "column":
- with v_col2:
- view_profiling_button(
- selected_item["column_names"],
- selected_item["table_name"],
- selected_item["table_groups_id"],
- )
-
- with v_col3:
- if st.button(
- ":material/visibility: Source Data", help="View current source data for highlighted result",
- use_container_width=True
- ):
- MixpanelService().send_event(
- "view-source-data",
- page=PAGE_PATH,
- test_type=selected_item["test_name_short"],
- )
- source_data_dialog(selected_item)
-
- with v_col4:
-
- report_eligible_rows = [
- row for row in selected_rows
- if row["result_status"] != "Passed" and row["disposition"] in (None, "Confirmed")
- ]
-
- if multi_select:
- report_btn_help = (
- "Generate PDF reports for the selected results that are not muted or dismissed and are not Passed"
- )
+ dfh = test_result_queries.get_test_result_history(selected_item)
+ show_hist_columns = ["test_date", "threshold_value", "result_measure", "result_status"]
+
+ time_columns = ["test_date"]
+ date_service.accommodate_dataframe_to_timezone(dfh, st.session_state, time_columns)
+
+ pg_col1, pg_col2 = st.columns([0.5, 0.5])
+
+ with pg_col1:
+ fm.show_subheader(selected_item["test_name_short"])
+ st.markdown(f"###### {selected_item['test_description']}")
+ if selected_item["measure_uom_description"]:
+ st.caption(selected_item["measure_uom_description"])
+ if selected_item["result_message"]:
+ st.caption(selected_item["result_message"].replace("*", "\\*"))
+ fm.render_grid_select(dfh, show_hist_columns, selection_mode="disabled", key="test_history")
+ with pg_col2:
+ ut_tab1, ut_tab2 = st.tabs(["History", "Test Definition"])
+ with ut_tab1:
+ if dfh.empty:
+ st.write("Test history not available.")
else:
- report_btn_help = "Generate PDF report for selected result"
-
- if st.button(
- ":material/download: Issue Report",
- use_container_width=True,
- disabled=not report_eligible_rows,
- help=report_btn_help,
- ):
- MixpanelService().send_event(
- "download-issue-report",
- page=PAGE_PATH,
- issue_count=len(report_eligible_rows),
- )
- dialog_title = "Download Issue Report"
- if len(report_eligible_rows) == 1:
- download_dialog(
- dialog_title=dialog_title,
- file_content_func=get_report_file_data,
- args=(report_eligible_rows[0],),
- )
- else:
- zip_func = zip_multi_file_data(
- "testgen_test_issue_reports.zip",
- get_report_file_data,
- [(arg,) for arg in selected_rows],
- )
- download_dialog(dialog_title=dialog_title, file_content_func=zip_func)
-
- if selected_item:
- with pg_col1:
- fm.show_subheader(selected_item["test_name_short"])
- st.markdown(f"###### {selected_item['test_description']}")
- if selected_item["measure_uom_description"]:
- st.caption(selected_item["measure_uom_description"])
- if selected_item["result_message"]:
- st.caption(selected_item["result_message"].replace("*", "\\*"))
- fm.render_grid_select(dfh, show_hist_columns, selection_mode="disabled", key="test_history")
- with pg_col2:
- ut_tab1, ut_tab2 = st.tabs(["History", "Test Definition"])
- with ut_tab1:
- if dfh.empty:
- st.write("Test history not available.")
- else:
- # write_history_graph(dfh)
- write_history_chart_v2(dfh)
- with ut_tab2:
- show_test_def_detail(selected_item["test_definition_id"], test_suite)
+ write_history_chart_v2(dfh)
+ with ut_tab2:
+ show_test_def_detail(selected_item["test_definition_id"], test_suite)
@with_database_session
@@ -637,6 +693,7 @@ def get_excel_report_data(
"result_status": {"header": "Status"},
"result_message": {"header": "Message"},
"action": {},
+ "flagged_display": {"header": "Flagged"},
}
return get_excel_file_data(
data,
@@ -814,7 +871,7 @@ def source_data_dialog(selected_row):
st.markdown(f"#### {selected_row['test_name_short']}")
st.caption(selected_row["test_description"])
-
+
st.markdown("#### Test Parameters")
testgen.caption(selected_row["input_parameters"], styles="max-height: 75px; overflow: auto;")
@@ -822,17 +879,18 @@ def source_data_dialog(selected_row):
st.markdown("#### Result Detail")
st.caption(selected_row["result_message"].replace("*", "\\*"))
+ mask_pii = not session.auth.user_has_permission("view_pii")
with st.spinner("Retrieving source data..."):
if selected_row["test_type"] == "CUSTOM":
- bad_data_status, bad_data_msg, _, df_bad = get_test_issue_source_data_custom(selected_row, limit=500)
+ bad_data_status, bad_data_msg, _, df_bad = get_test_issue_source_data_custom(selected_row, limit=500, mask_pii=mask_pii)
else:
- bad_data_status, bad_data_msg, _, df_bad = get_test_issue_source_data(selected_row, limit=500)
+ bad_data_status, bad_data_msg, _, df_bad = get_test_issue_source_data(selected_row, limit=500, mask_pii=mask_pii)
if bad_data_status in {"ND", "NA"}:
st.info(bad_data_msg)
elif bad_data_status == "ERR":
st.error(bad_data_msg)
elif df_bad is None:
- st.error("An unknown error was encountered.")
+ st.error("Something went wrong while loading the data.")
else:
if bad_data_msg:
st.info(bad_data_msg)
@@ -853,20 +911,13 @@ def source_data_dialog(selected_row):
st.code(query, language="sql", wrap_lines=True, height=100)
-def view_edit_test(button_container, test_definition_id):
- if test_definition_id:
- with button_container:
- if st.button(":material/edit: Edit Test", help="Edit the Test Definition", use_container_width=True):
- show_test_form_by_id(test_definition_id)
-
-
def get_report_file_data(update_progress, tr_data) -> FILE_DATA_TYPE:
tr_id = tr_data["test_result_id"][:8]
tr_time = pd.Timestamp(tr_data["test_date"]).strftime("%Y%m%d_%H%M%S")
file_name = f"testgen_test_issue_report_{tr_id}_{tr_time}.pdf"
with BytesIO() as buffer:
- create_report(buffer, tr_data)
+ create_report(buffer, tr_data, mask_pii=not session.auth.user_has_permission("view_pii"))
update_progress(1.0)
buffer.seek(0)
return file_name, "application/pdf", buffer.read()
diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py
index ea30c3fd..919651c7 100644
--- a/testgen/ui/views/test_runs.py
+++ b/testgen/ui/views/test_runs.py
@@ -24,6 +24,7 @@
from testgen.ui.navigation.menu import MenuItem
from testgen.ui.navigation.page import Page
from testgen.ui.navigation.router import Router
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.session import session, temp_value
from testgen.ui.views.dialogs.manage_notifications import NotificationSettingsDialogBase
from testgen.ui.views.dialogs.manage_schedules import ScheduleDialog
@@ -51,7 +52,7 @@ class TestRunsPage(Page):
def render(self, project_code: str, table_group_id: str | None = None, test_suite_id: str | None = None, **_kwargs) -> None:
testgen.page_header(
PAGE_TITLE,
- "data-quality-testing",
+ "data-quality-testing/",
)
with st.spinner("Loading data ..."):
@@ -242,8 +243,8 @@ def on_delete_confirmed(*_args) -> None:
TestRun.cancel_run(test_run.test_run_id)
send_test_run_notifications(TestRun.get(test_run.test_run_id))
TestRun.cascade_delete(test_run_ids)
- st.rerun()
+ safe_rerun()
except Exception:
LOG.exception("Failed to delete test run")
- result = {"success": False, "message": "Unable to delete the test run, try again."}
- st.rerun(scope="fragment")
+ result = {"success": False, "message": "Something went wrong while deleting the test run."}
+ safe_rerun(scope="fragment")
diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py
index 033972b5..a6d31834 100644
--- a/testgen/ui/views/test_suites.py
+++ b/testgen/ui/views/test_suites.py
@@ -14,6 +14,7 @@
from testgen.ui.navigation.menu import MenuItem
from testgen.ui.navigation.page import Page
from testgen.ui.navigation.router import Router
+from testgen.ui.services.rerun_service import safe_rerun
from testgen.ui.services.string_service import empty_if_null
from testgen.ui.session import session
from testgen.ui.views.dialogs.generate_tests_dialog import generate_tests_dialog
@@ -37,15 +38,15 @@ class TestSuitesPage(Page):
order=2,
)
- def render(self, project_code: str, table_group_id: str | None = None, **_kwargs) -> None:
+ def render(self, project_code: str, table_group_id: str | None = None, test_suite_name: str | None = None, **_kwargs) -> None:
testgen.page_header(
PAGE_TITLE,
- "manage-test-suites",
+ "connect-your-database/manage-test-suites/",
)
table_groups = TableGroup.select_minimal_where(TableGroup.project_code == project_code)
user_can_edit = session.auth.user_has_permission("edit")
- test_suites = TestSuite.select_summary(project_code, table_group_id)
+ test_suites = TestSuite.select_summary(project_code, table_group_id, test_suite_name)
project_summary = Project.get_summary(project_code)
testgen.testgen_component(
@@ -60,6 +61,7 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs
"selected": str(table_group_id) == str(table_group.id),
} for table_group in table_groups
],
+ "test_suite_name": test_suite_name,
"permissions": {
"can_edit": user_can_edit,
}
@@ -78,8 +80,8 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs
)
-def on_test_suites_filtered(table_group_id: str | None = None) -> None:
- Router().set_query_params({ "table_group_id": table_group_id })
+def on_test_suites_filtered(params: dict) -> None:
+ Router().set_query_params(params)
@st.dialog(title="Add Test Suite")
@@ -206,7 +208,7 @@ def show_test_suite(mode, project_code, table_groups: Iterable[TableGroupMinimal
)
st.success(success_message)
time.sleep(1)
- st.rerun()
+ safe_rerun()
@st.dialog(title="Delete Test Suite")
@@ -245,7 +247,7 @@ def delete_test_suite_dialog(test_suite_id: str) -> None:
success_message = f"Test Suite {test_suite_name} has been deleted. "
st.success(success_message)
time.sleep(1)
- st.rerun()
+ safe_rerun()
@st.dialog(title="Export to Observability")
diff --git a/testgen/utils/plugins.py b/testgen/utils/plugins.py
index 15bb024d..1863d03e 100644
--- a/testgen/utils/plugins.py
+++ b/testgen/utils/plugins.py
@@ -1,4 +1,7 @@
+from __future__ import annotations
+
import dataclasses
+import importlib
import importlib.metadata
import inspect
import json
@@ -6,7 +9,8 @@
import shutil
from collections.abc import Generator
from pathlib import Path
-from typing import ClassVar
+from types import ModuleType
+from typing import ClassVar, get_args
from testgen.ui.assets import get_asset_path
from testgen.ui.auth import Authentication
@@ -20,7 +24,7 @@
ui_plugins_entrypoint_prefix = "./plugin_pages"
-def discover() -> Generator["Plugin", None, None]:
+def discover() -> Generator[Plugin, None, None]:
ui_plugins_provision_file.touch(exist_ok=True)
for package_path, distribution_names in importlib.metadata.packages_distributions().items():
if package_path.startswith(PLUGIN_PREFIX):
@@ -98,42 +102,95 @@ def _read_ui_plugin_spec() -> dict:
return json.loads(contents.replace("export default ", "")[:-1])
+class RBACProvider:
+ """Base RBAC provider. OS default: all permissions granted."""
+
+ @staticmethod
+ def check_permission(_user: object, _permission: str) -> bool:
+ return True
+
+ @staticmethod
+ def get_roles_with_permission(_permission: str) -> list[str]:
+ """Return roles that have the given permission. OS default: all roles."""
+ from testgen.common.models.project_membership import RoleType
+
+ return list(get_args(RoleType))
+
+
class PluginSpec:
+ rbac: ClassVar[type[RBACProvider]] = RBACProvider
auth: ClassVar[type[Authentication] | None] = None
- page: ClassVar[type[Page] | None] = None
+ pages: ClassVar[list[type[Page]]] = []
logo: ClassVar[type[Logo] | None] = None
component: ClassVar[ComponentSpec | None] = None
+ @classmethod
+ def configure_ui(cls) -> None:
+ """Populate UI-related class attributes (pages, auth, logo, component).
+
+ Override this in plugins to defer Streamlit-dependent imports until Streamlit
+ is actually running. Called by ``Plugin.load_streamlit()``, never by ``Plugin.load()``.
+ """
+
+
+class PluginHook:
+ """Singleton holding resolved plugin values, pre-loaded with defaults."""
+
+ _instance: PluginHook | None = None
+ rbac: type[RBACProvider] = RBACProvider
+
+ @classmethod
+ def instance(cls) -> PluginHook:
+ if cls._instance is None:
+ cls._instance = cls()
+ return cls._instance
+
+
+def _find_plugin_spec(module: ModuleType) -> type[PluginSpec] | None:
+ """Find the first concrete PluginSpec subclass in a module."""
+ for name in dir(module):
+ cls = getattr(module, name, None)
+ if inspect.isclass(cls) and issubclass(cls, PluginSpec) and cls is not PluginSpec:
+ return cls
+ return None
+
@dataclasses.dataclass
class Plugin:
package: str
version: str
- def load(self) -> PluginSpec:
- plugin_page = None
- plugin_auth = None
- plugin_logo = None
- plugin_component_spec = None
-
+ def load(self) -> type[PluginSpec]:
+ """Lightweight load: import plugin module and populate PluginHook."""
module = importlib.import_module(self.package)
- for property_name in dir(module):
- if ((maybe_class := getattr(module, property_name, None)) and inspect.isclass(maybe_class)):
- if issubclass(maybe_class, PluginSpec) and maybe_class != PluginSpec:
- return maybe_class
-
- if issubclass(maybe_class, Page):
- plugin_page = maybe_class
-
- elif issubclass(maybe_class, Authentication):
- plugin_auth = maybe_class
-
- elif issubclass(maybe_class, Logo):
- plugin_logo = maybe_class
-
- return type("AnyPlugin", (PluginSpec,), {
- "page": plugin_page,
- "auth": plugin_auth,
- "logo": plugin_logo,
- "component": plugin_component_spec,
- })
+ spec = _find_plugin_spec(module)
+ if spec is not None:
+ hook = PluginHook.instance()
+ if spec.rbac is not RBACProvider:
+ hook.rbac = spec.rbac
+ return spec or PluginSpec
+
+ def load_streamlit(self) -> type[PluginSpec]:
+ """Full Streamlit load. Calls load() first, then configure_ui() for UI attributes."""
+ spec = self.load()
+ spec.configure_ui()
+ if spec is not PluginSpec:
+ return spec
+
+ # Fallback: discover UI classes from module (backward compat for plugins without explicit PluginSpec)
+ _discoverable: dict[type, str] = {Page: "page", Authentication: "auth", Logo: "logo"}
+ attrs: dict[str, type] = {}
+ module = importlib.import_module(self.package)
+
+ for name in dir(module):
+ cls = getattr(module, name, None)
+ if not inspect.isclass(cls):
+ continue
+ for base, attr in _discoverable.items():
+ if issubclass(cls, base) and cls is not base:
+ if attr == "page":
+ attrs.setdefault("pages", []).append(cls)
+ else:
+ attrs[attr] = cls
+
+ return type("AnyPlugin", (PluginSpec,), attrs) if attrs else PluginSpec
diff --git a/tests/unit/commands/queries/test_execute_tests_query.py b/tests/unit/commands/queries/test_execute_tests_query.py
new file mode 100644
index 00000000..fe51fccc
--- /dev/null
+++ b/tests/unit/commands/queries/test_execute_tests_query.py
@@ -0,0 +1,360 @@
+from datetime import UTC, datetime
+from uuid import uuid4
+
+import pytest
+
+from testgen.commands.queries.execute_tests_query import (
+ TestExecutionDef,
+ build_cat_expressions,
+ group_cat_tests,
+ parse_cat_results,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def _make_td(**overrides) -> TestExecutionDef:
+ """Build a minimal TestExecutionDef with sensible defaults."""
+ defaults = {
+ "id": uuid4(),
+ "test_type": "Alpha",
+ "schema_name": "public",
+ "table_name": "orders",
+ "column_name": "amount",
+ "skip_errors": 0,
+ "history_calculation": "NONE",
+ "custom_query": "",
+ "prediction": None,
+ "run_type": "CAT",
+ "test_scope": "column",
+ "template": "",
+ "measure": "COUNT(*)",
+ "test_operator": ">=",
+ "test_condition": "100",
+ "baseline_ct": "",
+ "baseline_unique_ct": "",
+ "baseline_value": "",
+ "baseline_value_ct": "",
+ "threshold_value": "",
+ "baseline_sum": "",
+ "baseline_avg": "",
+ "baseline_sd": "",
+ "lower_tolerance": "",
+ "upper_tolerance": "",
+ "subset_condition": "",
+ "groupby_names": "",
+ "having_condition": "",
+ "window_date_column": "",
+ "window_days": "",
+ "match_schema_name": "",
+ "match_table_name": "",
+ "match_column_names": "",
+ "match_subset_condition": "",
+ "match_groupby_names": "",
+ "match_having_condition": "",
+ }
+ defaults.update(overrides)
+ return TestExecutionDef(**defaults)
+
+
+def _make_input_params_fn():
+ return lambda td: f"params_for_{td.test_type}"
+
+
+# --- build_cat_expressions ---
+
+
+def test_build_basic_measure_with_coalesce_cast():
+ measure_expr, _ = build_cat_expressions(
+ measure="COUNT(*)",
+ test_operator=">=",
+ test_condition="100",
+ history_calculation="NONE",
+ lower_tolerance="10",
+ upper_tolerance="200",
+ varchar_type="VARCHAR",
+ concat_operator="||",
+ )
+ assert "COALESCE(CAST(COUNT(*) AS VARCHAR)" in measure_expr
+ assert "||" in measure_expr
+ assert "'|'" in measure_expr
+ assert "|" in measure_expr
+
+
+def test_build_normal_pass_fail_condition():
+ _, cond_expr = build_cat_expressions(
+ measure="COUNT(*)",
+ test_operator=">=",
+ test_condition="100",
+ history_calculation="NONE",
+ lower_tolerance="10",
+ upper_tolerance="200",
+ varchar_type="VARCHAR",
+ concat_operator="||",
+ )
+ assert "CASE WHEN" in cond_expr
+ assert "COUNT(*)>=100" in cond_expr
+ assert "THEN '0,'" in cond_expr
+ assert "ELSE '1,'" in cond_expr
+
+
+def test_build_between_operator_spacing():
+ _, cond_expr = build_cat_expressions(
+ measure="AVG(price)",
+ test_operator=" BETWEEN ",
+ test_condition="10 AND 200",
+ history_calculation="NONE",
+ lower_tolerance="10",
+ upper_tolerance="200",
+ varchar_type="VARCHAR",
+ concat_operator="||",
+ )
+ # BETWEEN branch uses f"{measure} {operator} {condition}" — double spaces expected
+ # since operator already includes spaces
+ assert "AVG(price) BETWEEN 10 AND 200" in cond_expr
+
+
+def test_build_non_between_operator_no_spacing():
+ _, cond_expr = build_cat_expressions(
+ measure="COUNT(*)",
+ test_operator="<=",
+ test_condition="500",
+ history_calculation="NONE",
+ lower_tolerance="10",
+ upper_tolerance="200",
+ varchar_type="VARCHAR",
+ concat_operator="||",
+ )
+ assert "COUNT(*)<=500" in cond_expr
+
+
+def test_build_prediction_mode_training():
+ """PREDICT mode without tolerances should return -1 (training)."""
+ _, cond_expr = build_cat_expressions(
+ measure="COUNT(*)",
+ test_operator=">=",
+ test_condition="100",
+ history_calculation="PREDICT",
+ lower_tolerance="",
+ upper_tolerance="",
+ varchar_type="VARCHAR",
+ concat_operator="||",
+ )
+ assert cond_expr == "'-1,'"
+
+
+def test_build_prediction_mode_with_tolerances():
+ """PREDICT mode with tolerances should produce normal condition."""
+ _, cond_expr = build_cat_expressions(
+ measure="COUNT(*)",
+ test_operator=">=",
+ test_condition="100",
+ history_calculation="PREDICT",
+ lower_tolerance="50",
+ upper_tolerance="200",
+ varchar_type="VARCHAR",
+ concat_operator="||",
+ )
+ assert "CASE WHEN" in cond_expr
+
+
+def test_build_prediction_partial_tolerance_is_training():
+ """PREDICT with only lower tolerance set should still be training mode."""
+ _, cond_expr = build_cat_expressions(
+ measure="COUNT(*)",
+ test_operator=">=",
+ test_condition="100",
+ history_calculation="PREDICT",
+ lower_tolerance="50",
+ upper_tolerance="",
+ varchar_type="VARCHAR",
+ concat_operator="||",
+ )
+ assert cond_expr == "'-1,'"
+
+
+def test_build_prediction_zero_tolerance_is_not_training():
+ """PREDICT with tolerance of 0 should produce normal condition, not training mode."""
+ _, cond_expr = build_cat_expressions(
+ measure="COUNT(*)",
+ test_operator=">=",
+ test_condition="100",
+ history_calculation="PREDICT",
+ lower_tolerance=0,
+ upper_tolerance=0,
+ varchar_type="VARCHAR",
+ concat_operator="||",
+ )
+ assert "CASE WHEN" in cond_expr
+
+
+def test_build_prediction_zero_lower_tolerance_is_not_training():
+ """PREDICT with lower_tolerance=0 and a valid upper should produce normal condition."""
+ _, cond_expr = build_cat_expressions(
+ measure="COUNT(*)",
+ test_operator=">=",
+ test_condition="100",
+ history_calculation="PREDICT",
+ lower_tolerance=0,
+ upper_tolerance="200",
+ varchar_type="VARCHAR",
+ concat_operator="||",
+ )
+ assert "CASE WHEN" in cond_expr
+
+
+def test_build_prediction_none_tolerance_is_training():
+ """PREDICT with None tolerances should return training mode."""
+ _, cond_expr = build_cat_expressions(
+ measure="COUNT(*)",
+ test_operator=">=",
+ test_condition="100",
+ history_calculation="PREDICT",
+ lower_tolerance=None,
+ upper_tolerance=None,
+ varchar_type="VARCHAR",
+ concat_operator="||",
+ )
+ assert cond_expr == "'-1,'"
+
+
+def test_build_custom_null_value():
+ measure_expr, _ = build_cat_expressions(
+ measure="COUNT(*)",
+ test_operator=">=",
+ test_condition="100",
+ history_calculation="NONE",
+ lower_tolerance="",
+ upper_tolerance="",
+ varchar_type="VARCHAR",
+ concat_operator="||",
+ null_value="MISSING",
+ )
+ assert "'MISSING|'" in measure_expr
+
+
+# --- group_cat_tests ---
+
+
+def test_group_single_mode():
+ tds = [_make_td(measure_expression="m1", condition_expression="c1"),
+ _make_td(measure_expression="m2", condition_expression="c2")]
+ groups = group_cat_tests(tds, max_query_chars=10000, concat_operator="||", single=True)
+ assert len(groups) == 2
+ assert len(groups[0]) == 1
+ assert len(groups[1]) == 1
+
+
+def test_group_all_fit_in_one():
+ tds = [_make_td(measure_expression="m1", condition_expression="c1"),
+ _make_td(measure_expression="m2", condition_expression="c2")]
+ groups = group_cat_tests(tds, max_query_chars=10000, concat_operator="||")
+ assert len(groups) == 1
+ assert len(groups[0]) == 2
+
+
+def test_group_character_overflow_splits():
+ # Each td takes len("m"*50) + len("c"*50) + 2*len("||") = 104 chars
+ tds = [_make_td(measure_expression="m" * 50, condition_expression="c" * 50) for _ in range(3)]
+ # max_query_chars = 250 fits 2 tds (208 <= 250), third overflows (312 > 250)
+ groups = group_cat_tests(tds, max_query_chars=250, concat_operator="||")
+ assert len(groups) == 2
+ assert len(groups[0]) == 2
+ assert len(groups[1]) == 1
+
+
+def test_group_different_tables_separate():
+ td1 = _make_td(schema_name="public", table_name="orders",
+ measure_expression="m1", condition_expression="c1")
+ td2 = _make_td(schema_name="public", table_name="customers",
+ measure_expression="m2", condition_expression="c2")
+ groups = group_cat_tests([td1, td2], max_query_chars=10000, concat_operator="||")
+ assert len(groups) == 2
+
+
+def test_group_empty_input():
+ groups = group_cat_tests([], max_query_chars=10000, concat_operator="||")
+ assert groups == []
+
+
+def test_group_same_table_together():
+ tds = [_make_td(schema_name="s", table_name="t",
+ measure_expression="m", condition_expression="c") for _ in range(5)]
+ groups = group_cat_tests(tds, max_query_chars=10000, concat_operator="||")
+ assert len(groups) == 1
+ assert len(groups[0]) == 5
+
+
+# --- parse_cat_results ---
+
+
+def test_parse_basic_single_result():
+ td = _make_td(test_type="Alpha")
+ test_defs = [[td]]
+ results = [{"query_index": 0, "result_measures": "42|", "result_codes": "1,"}]
+ run_id = uuid4()
+ suite_id = uuid4()
+ start = datetime.now(UTC)
+
+ rows = parse_cat_results(results, test_defs, run_id, suite_id, start,
+ _make_input_params_fn())
+ assert len(rows) == 1
+ row = rows[0]
+ assert row[0] == run_id
+ assert row[1] == suite_id
+ assert row[2] == start
+ assert row[3] == td.id
+ assert row[10] == "1" # result_code
+ assert row[13] == "42" # result_measure
+
+
+def test_parse_null_value_handling():
+ td = _make_td()
+ test_defs = [[td]]
+ results = [{"query_index": 0, "result_measures": "|", "result_codes": "0,"}]
+
+ rows = parse_cat_results(results, test_defs, uuid4(), uuid4(),
+ datetime.now(UTC), _make_input_params_fn())
+ assert rows[0][13] is None # should become None
+
+
+def test_parse_multi_test_per_query():
+ td1 = _make_td(test_type="Alpha")
+ td2 = _make_td(test_type="Beta")
+ test_defs = [[td1, td2]]
+ results = [{"query_index": 0, "result_measures": "10|20|", "result_codes": "1,0,"}]
+
+ rows = parse_cat_results(results, test_defs, uuid4(), uuid4(),
+ datetime.now(UTC), _make_input_params_fn())
+ assert len(rows) == 2
+ assert rows[0][13] == "10"
+ assert rows[1][13] == "20"
+ assert rows[0][10] == "1"
+ assert rows[1][10] == "0"
+
+
+def test_parse_multiple_queries():
+ td1 = _make_td(test_type="Alpha")
+ td2 = _make_td(test_type="Beta")
+ test_defs = [[td1], [td2]]
+ results = [
+ {"query_index": 0, "result_measures": "10|", "result_codes": "1,"},
+ {"query_index": 1, "result_measures": "20|", "result_codes": "0,"},
+ ]
+
+ rows = parse_cat_results(results, test_defs, uuid4(), uuid4(),
+ datetime.now(UTC), _make_input_params_fn())
+ assert len(rows) == 2
+ assert rows[0][4] == "Alpha"
+ assert rows[1][4] == "Beta"
+
+
+def test_parse_result_code_negative_one():
+ """Training mode result (-1) should pass through."""
+ td = _make_td()
+ test_defs = [[td]]
+ results = [{"query_index": 0, "result_measures": "42|", "result_codes": "-1,"}]
+
+ rows = parse_cat_results(results, test_defs, uuid4(), uuid4(),
+ datetime.now(UTC), _make_input_params_fn())
+ assert rows[0][10] == "-1"
diff --git a/tests/unit/commands/queries/test_profiling_query.py b/tests/unit/commands/queries/test_profiling_query.py
new file mode 100644
index 00000000..61ad7df0
--- /dev/null
+++ b/tests/unit/commands/queries/test_profiling_query.py
@@ -0,0 +1,93 @@
+import pytest
+
+from testgen.commands.queries.profiling_query import calculate_sampling_params
+
+pytestmark = pytest.mark.unit
+
+
+# --- calculate_sampling_params ---
+
+
+def test_sampling_basic_calculation():
+ result = calculate_sampling_params("orders", 10000, "30", min_sample=100)
+ assert result is not None
+ assert result.table_name == "orders"
+ assert result.sample_count == 3000
+ assert result.sample_ratio == pytest.approx(10000 / 3000)
+ assert result.sample_percent == pytest.approx(30.0)
+
+
+def test_sampling_non_numeric_percent_fallback():
+ """Non-numeric string should fall back to 30%."""
+ result = calculate_sampling_params("orders", 10000, "abc", min_sample=100)
+ assert result is not None
+ assert result.sample_count == 3000
+
+
+def test_sampling_empty_string_percent_fallback():
+ result = calculate_sampling_params("orders", 10000, "", min_sample=100)
+ assert result is not None
+ assert result.sample_count == 3000
+
+
+def test_sampling_none_percent_fallback():
+ result = calculate_sampling_params("orders", 10000, None, min_sample=100)
+ assert result is not None
+ assert result.sample_count == 3000
+
+
+def test_sampling_percent_out_of_range_zero():
+ result = calculate_sampling_params("orders", 10000, "0", min_sample=100)
+ assert result is None
+
+
+def test_sampling_percent_out_of_range_100():
+ result = calculate_sampling_params("orders", 10000, "100", min_sample=100)
+ assert result is None
+
+
+def test_sampling_record_count_below_min_sample():
+ result = calculate_sampling_params("small_table", 50, "30", min_sample=100)
+ assert result is None
+
+
+def test_sampling_record_count_equals_min_sample():
+ result = calculate_sampling_params("small_table", 100, "30", min_sample=100)
+ assert result is None
+
+
+def test_sampling_clamped_to_min_sample():
+ """When calculated sample is below min_sample, clamp up to min_sample."""
+ result = calculate_sampling_params("orders", 1000, "5", min_sample=200)
+ # 5% of 1000 = 50, but min_sample is 200
+ assert result is not None
+ assert result.sample_count == 200
+
+
+def test_sampling_clamped_to_max_sample():
+ """When calculated sample exceeds max, clamp down to max."""
+ result = calculate_sampling_params("huge_table", 10_000_000, "50", min_sample=100, max_sample=999000)
+ # 50% of 10M = 5M, but max is 999000
+ assert result is not None
+ assert result.sample_count == 999000
+
+
+def test_sampling_ratio_and_percent_math():
+ result = calculate_sampling_params("orders", 5000, "20", min_sample=100)
+ # 20% of 5000 = 1000
+ assert result.sample_count == 1000
+ assert result.sample_ratio == pytest.approx(5.0)
+ assert result.sample_percent == pytest.approx(20.0)
+
+
+def test_sampling_float_percent():
+ result = calculate_sampling_params("orders", 10000, 25.5, min_sample=100)
+ # 25.5% of 10000 = 2550
+ assert result is not None
+ assert result.sample_count == 2550
+
+
+def test_sampling_decimal_string_percent():
+ result = calculate_sampling_params("orders", 10000, "15.5", min_sample=100)
+ assert result is not None
+ assert result.sample_count == 1550
diff --git a/tests/unit/commands/test_run_test_validation.py b/tests/unit/commands/test_run_test_validation.py
new file mode 100644
index 00000000..6336a86c
--- /dev/null
+++ b/tests/unit/commands/test_run_test_validation.py
@@ -0,0 +1,249 @@
+from uuid import uuid4
+
+import pytest
+
+from testgen.commands.queries.execute_tests_query import TestExecutionDef
+from testgen.commands.run_test_validation import check_identifiers, collect_test_identifiers
+
+pytestmark = pytest.mark.unit
+
+
+def _make_td(**overrides) -> TestExecutionDef:
+ """Build a minimal TestExecutionDef with sensible defaults."""
+ defaults = {
+ "id": uuid4(),
+ "test_type": "Alpha",
+ "schema_name": "public",
+ "table_name": "orders",
+ "column_name": "amount",
+ "skip_errors": 0,
+ "history_calculation": "NONE",
+ "custom_query": "",
+ "prediction": None,
+ "run_type": "CAT",
+ "test_scope": "column",
+ "template": "",
+ "measure": "",
+ "test_operator": "=",
+ "test_condition": "",
+ "baseline_ct": "",
+ "baseline_unique_ct": "",
+ "baseline_value": "",
+ "baseline_value_ct": "",
+ "threshold_value": "",
+ "baseline_sum": "",
+ "baseline_avg": "",
+ "baseline_sd": "",
+ "lower_tolerance": "",
+ "upper_tolerance": "",
+ "subset_condition": "",
+ "groupby_names": "",
+ "having_condition": "",
+ "window_date_column": "",
+ "window_days": "",
+ "match_schema_name": "",
+ "match_table_name": "",
+ "match_column_names": "",
+ "match_subset_condition": "",
+ "match_groupby_names": "",
+ "match_having_condition": "",
+ }
+ defaults.update(overrides)
+ return TestExecutionDef(**defaults)
+
+
+# --- collect_test_identifiers ---
+
+
+def test_collect_custom_type_skipped():
+ td = _make_td(test_type="CUSTOM")
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ assert len(identifiers) == 0
+ assert len(schemas) == 0
+ assert len(errors) == 0
+
+
+def test_collect_tablegroup_scope_skipped():
+ td = _make_td(test_scope="tablegroup")
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ assert len(identifiers) == 0
+
+
+def test_collect_table_scope_collects_table_only():
+ td = _make_td(test_scope="table", column_name="irrelevant")
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ # Should have table-level identifier (column=None), not column-level
+ assert (td.schema_name.lower(), td.table_name.lower(), None) in identifiers
+
+
+def test_collect_column_scope_single_column():
+ td = _make_td(test_scope="column", column_name="amount")
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ assert ("public", "orders", "amount") in identifiers
+
+
+def test_collect_column_scope_multi_column():
+ """Multi-column scope (not single_column) should split on commas."""
+ td = _make_td(test_scope="referential", column_name="col_a,col_b", match_schema_name="", match_table_name="")
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ assert ("public", "orders", "col_a") in identifiers
+ assert ("public", "orders", "col_b") in identifiers
+
+
+def test_collect_quoted_multi_column_parsing():
+ """Columns with quoted identifiers should be parsed correctly."""
+ td = _make_td(test_scope="referential", column_name='"col,a","col_b"', match_schema_name="", match_table_name="")
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ assert ("public", "orders", "col,a") in identifiers
+ assert ("public", "orders", "col_b") in identifiers
+
+
+def test_collect_groupby_names():
+ td = _make_td(groupby_names="region,country")
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ assert ("public", "orders", "region") in identifiers
+ assert ("public", "orders", "country") in identifiers
+
+
+def test_collect_referential_window_date_column():
+ td = _make_td(
+ test_scope="referential",
+ column_name="col_a",
+ window_date_column="created_at",
+ match_schema_name="public",
+ match_table_name="customers",
+ match_column_names="cust_id",
+ )
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ assert ("public", "orders", "created_at") in identifiers
+
+
+def test_collect_referential_match_columns():
+ td = _make_td(
+ test_scope="referential",
+ column_name="order_id",
+ match_schema_name="public",
+ match_table_name="customers",
+ match_column_names="cust_id",
+ )
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ assert ("public", "customers", "cust_id") in identifiers
+
+
+def test_collect_referential_match_groupby():
+ td = _make_td(
+ test_scope="referential",
+ column_name="order_id",
+ match_schema_name="public",
+ match_table_name="customers",
+ match_column_names="",
+ match_groupby_names="region",
+ )
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ assert ("public", "customers", "region") in identifiers
+
+
+def test_collect_referential_missing_match_schema_errors():
+ td = _make_td(
+ test_scope="referential",
+ column_name="order_id",
+ match_schema_name="",
+ match_table_name="",
+ match_column_names="cust_id",
+ )
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ assert td.id in errors
+ assert any("match schema" in e for e in errors[td.id])
+
+
+def test_collect_missing_schema_or_table_errors():
+ td = _make_td(schema_name="", table_name="")
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ assert td.id in errors
+ assert any("schema, table, or column not defined" in e for e in errors[td.id])
+
+
+def test_collect_aggregate_type_validates_table_only():
+ td = _make_td(test_type="Aggregate_Balance", test_scope="referential",
+ column_name="amount", match_schema_name="public",
+ match_table_name="customers", match_column_names="balance")
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ # Table-level check for main table
+ assert ("public", "orders", None) in identifiers
+ # Match columns should NOT be checked for Aggregate_ types
+ assert ("public", "customers", "balance") not in identifiers
+
+
+def test_collect_target_schemas_populated():
+ td1 = _make_td(schema_name="schema_a")
+ td2 = _make_td(schema_name="schema_b")
+ identifiers, schemas, errors = collect_test_identifiers([td1, td2], '"')
+ assert "schema_a" in schemas
+ assert "schema_b" in schemas
+
+
+def test_collect_error_format_starts_with_deactivated():
+ td = _make_td(schema_name="", table_name="")
+ identifiers, schemas, errors = collect_test_identifiers([td], '"')
+ assert errors[td.id][0] == "Deactivated"
+
+
+# --- check_identifiers ---
+
+
+def test_check_all_identifiers_present():
+ test_id = uuid4()
+ identifiers = {("public", "orders", "amount"): {test_id}}
+ tables = {("public", "orders")}
+ columns = {("public", "orders", "amount")}
+ errors = check_identifiers(identifiers, tables, columns)
+ assert len(errors) == 0
+
+
+def test_check_missing_table():
+ test_id = uuid4()
+ identifiers = {("public", "orders", None): {test_id}}
+ tables = set() # No tables exist
+ columns = set()
+ errors = check_identifiers(identifiers, tables, columns)
+ assert test_id in errors
+ assert any("Missing table" in e for e in errors[test_id])
+
+
+def test_check_missing_column():
+ test_id = uuid4()
+ identifiers = {("public", "orders", "nonexistent"): {test_id}}
+ tables = {("public", "orders")}
+ columns = {("public", "orders", "amount")} # different column
+ errors = check_identifiers(identifiers, tables, columns)
+ assert test_id in errors
+ assert any("Missing column" in e for e in errors[test_id])
+
+
+def test_check_table_only_identifier_passes():
+ """Identifier with column=None should only check table existence."""
+ test_id = uuid4()
+ identifiers = {("public", "orders", None): {test_id}}
+ tables = {("public", "orders")}
+ columns = set()
+ errors = check_identifiers(identifiers, tables, columns)
+ assert len(errors) == 0
+
+
+def test_check_multiple_tests_share_identifier():
+ id1, id2 = uuid4(), uuid4()
+ identifiers = {("public", "missing_table", None): {id1, id2}}
+ tables = set()
+ columns = set()
+ errors = check_identifiers(identifiers, tables, columns)
+ assert id1 in errors
+ assert id2 in errors
+
+
+def test_check_error_format_starts_with_deactivated():
+ test_id = uuid4()
+ identifiers = {("public", "orders", "bad_col"): {test_id}}
+ tables = {("public", "orders")}
+ columns = set()
+ errors = check_identifiers(identifiers, tables, columns)
+ assert errors[test_id][0] == "Deactivated"
diff --git a/tests/unit/commands/test_score_cards.py b/tests/unit/commands/test_score_cards.py
new file mode 100644
index 00000000..a537eee3
--- /dev/null
+++ b/tests/unit/commands/test_score_cards.py
@@ -0,0 +1,82 @@
+from uuid import uuid4
+
+import pytest
+
+from testgen.commands.run_refresh_score_cards_results import _score_card_to_results
+
+pytestmark = pytest.mark.unit
+
+
+def _make_score_card(**overrides):
+ defaults = {
+ "id": str(uuid4()),
+ "project_code": "test_project",
+ "name": "Test Score Card",
+ "score": 85.5,
+ "cde_score": 90.0,
+ "profiling_score": 80.0,
+ "testing_score": 88.0,
+ "categories": [],
+ "history": [],
+ "definition": None,
+ }
+ defaults.update(overrides)
+ return defaults
+
+
+def test_basic_result_count():
+ """Should produce 4 base results (score, cde_score, profiling_score, testing_score)."""
+ card = _make_score_card()
+ results = _score_card_to_results(card)
+ assert len(results) == 4
+
+
+def test_result_categories():
+ card = _make_score_card()
+ results = _score_card_to_results(card)
+ categories = [r.category for r in results]
+ assert categories == ["score", "cde_score", "profiling_score", "testing_score"]
+
+
+def test_result_scores_match_card():
+ card = _make_score_card(score=85.5, cde_score=90.0, profiling_score=80.0, testing_score=88.0)
+ results = _score_card_to_results(card)
+ assert results[0].score == 85.5
+ assert results[1].score == 90.0
+ assert results[2].score == 80.0
+ assert results[3].score == 88.0
+
+
+def test_definition_id_set():
+ card_id = str(uuid4())
+ card = _make_score_card(id=card_id)
+ results = _score_card_to_results(card)
+ for result in results:
+ assert str(result.definition_id) == card_id
+
+
+def test_with_categories():
+ """Categories from score card should be appended as extra results."""
+ card = _make_score_card(categories=[
+ {"label": "completeness", "score": 95.0},
+ {"label": "accuracy", "score": 72.0},
+ ])
+ results = _score_card_to_results(card)
+ assert len(results) == 6 # 4 base + 2 categories
+ assert results[4].category == "completeness"
+ assert results[4].score == 95.0
+ assert results[5].category == "accuracy"
+ assert results[5].score == 72.0
+
+
+def test_empty_categories():
+ card = _make_score_card(categories=[])
+ results = _score_card_to_results(card)
+ assert len(results) == 4
+
+
+def test_none_score_values():
+ card = _make_score_card(score=None, cde_score=None, profiling_score=None, testing_score=None)
+ results = _score_card_to_results(card)
+ for result in results:
+ assert result.score is None
diff --git a/tests/unit/commands/test_thresholds_prediction.py b/tests/unit/commands/test_thresholds_prediction.py
new file mode 100644
index 00000000..f9df4592
--- /dev/null
+++ b/tests/unit/commands/test_thresholds_prediction.py
@@ -0,0 +1,198 @@
+import json
+from unittest.mock import patch
+
+import pandas as pd
+import pytest
+from scipy import stats
+
+from testgen.commands.test_thresholds_prediction import (
+ T_DISTRIBUTION_THRESHOLD,
+ Z_SCORE_MAP,
+ compute_sarimax_threshold,
+)
+from testgen.common.models.test_suite import PredictSensitivity
+from testgen.common.time_series_service import NotEnoughData
+
+pytestmark = pytest.mark.unit
+
+
+def _make_history(n: int, value: float = 100.0) -> pd.DataFrame:
+ """Build a minimal history DataFrame with n data points."""
+ dates = pd.date_range("2025-01-01", periods=n, freq="D")
+ return pd.DataFrame({"result_signal": [value] * n}, index=dates)
+
+
+def _make_forecast(mean_values: list[float], se_values: list[float]) -> pd.DataFrame:
+ """Build a minimal forecast DataFrame with 'mean' and 'se' columns."""
+ dates = pd.date_range("2025-06-01", periods=len(mean_values), freq="D")
+ return pd.DataFrame({"mean": mean_values, "se": se_values}, index=dates)
+
+
+MOCK_TARGET = "testgen.commands.test_thresholds_prediction.get_sarimax_forecast"
+
+
+# --- min_lookback guard ---
+
+
+def test_below_min_lookback_returns_none():
+ history = _make_history(3)
+ lower, upper, prediction = compute_sarimax_threshold(history, PredictSensitivity.medium, min_lookback=5)
+ assert lower is None
+ assert upper is None
+ assert prediction is None
+
+
+# --- Normal tolerance calculation (large sample, z-scores used directly) ---
+
+
+@patch(MOCK_TARGET)
+def test_medium_sensitivity_large_sample(mock_forecast):
+ forecast = _make_forecast([100.0, 105.0], [10.0, 12.0])
+ mock_forecast.return_value = forecast
+ history = _make_history(T_DISTRIBUTION_THRESHOLD)
+
+ lower, upper, forecast_json = compute_sarimax_threshold(history, PredictSensitivity.medium)
+
+ # medium: lower z=-2.5, upper z=2.5, large sample uses z directly
+ assert lower == pytest.approx(100.0 + (-2.5 * 10.0))
+ assert upper == pytest.approx(100.0 + (2.5 * 10.0))
+ assert forecast_json is not None
+ parsed = json.loads(forecast_json)
+ assert "mean" in parsed
+
+
+@patch(MOCK_TARGET)
+def test_high_sensitivity_large_sample(mock_forecast):
+ forecast = _make_forecast([100.0], [10.0])
+ mock_forecast.return_value = forecast
+ history = _make_history(T_DISTRIBUTION_THRESHOLD)
+
+ lower, upper, _ = compute_sarimax_threshold(history, PredictSensitivity.high)
+
+ # high: lower z=-2.0, upper z=2.0
+ assert lower == pytest.approx(80.0)
+ assert upper == pytest.approx(120.0)
+
+
+@patch(MOCK_TARGET)
+def test_low_sensitivity_large_sample(mock_forecast):
+ forecast = _make_forecast([100.0], [10.0])
+ mock_forecast.return_value = forecast
+ history = _make_history(T_DISTRIBUTION_THRESHOLD)
+
+ lower, upper, _ = compute_sarimax_threshold(history, PredictSensitivity.low)
+
+ # low: lower z=-3.0, upper z=3.0
+ assert lower == pytest.approx(70.0)
+ assert upper == pytest.approx(130.0)
+
+
+# --- t-distribution adjustment for small samples ---
+
+
+@patch(MOCK_TARGET)
+def test_small_sample_uses_t_distribution(mock_forecast):
+ """With fewer than T_DISTRIBUTION_THRESHOLD points, z-scores should be
+ widened via t-distribution to account for estimation uncertainty."""
+ forecast = _make_forecast([100.0], [10.0])
+ mock_forecast.return_value = forecast
+ n = 10
+ history = _make_history(n)
+
+ lower, upper, _ = compute_sarimax_threshold(history, PredictSensitivity.medium)
+
+ # t-distribution multiplier for medium sensitivity (z=-2.5 / z=2.5)
+ lower_percentile = stats.norm.cdf(-2.5)
+ upper_percentile = stats.norm.cdf(2.5)
+ lower_mult = stats.t.ppf(lower_percentile, df=n - 1)
+ upper_mult = stats.t.ppf(upper_percentile, df=n - 1)
+
+ assert lower == pytest.approx(100.0 + (lower_mult * 10.0))
+ assert upper == pytest.approx(100.0 + (upper_mult * 10.0))
+
+ # t-distribution should produce wider bounds than raw z-scores
+ assert lower < 100.0 + (-2.5 * 10.0)
+ assert upper > 100.0 + (2.5 * 10.0)
+
+
+# --- NaN handling ---
+
+
+@patch(MOCK_TARGET)
+def test_nan_mean_returns_none(mock_forecast):
+ forecast = _make_forecast([float("nan")], [10.0])
+ mock_forecast.return_value = forecast
+ history = _make_history(T_DISTRIBUTION_THRESHOLD)
+
+ lower, upper, forecast_json = compute_sarimax_threshold(history, PredictSensitivity.medium)
+
+ assert lower is None
+ assert upper is None
+ assert forecast_json is None
+
+
+@patch(MOCK_TARGET)
+def test_nan_se_returns_none(mock_forecast):
+ forecast = _make_forecast([100.0], [float("nan")])
+ mock_forecast.return_value = forecast
+ history = _make_history(T_DISTRIBUTION_THRESHOLD)
+
+ lower, upper, forecast_json = compute_sarimax_threshold(history, PredictSensitivity.medium)
+
+ assert lower is None
+ assert upper is None
+ assert forecast_json is None
+
+
+# --- NotEnoughData from SARIMAX ---
+
+
+@patch(MOCK_TARGET, side_effect=NotEnoughData("not enough"))
+def test_not_enough_data_returns_none(mock_forecast):
+ history = _make_history(T_DISTRIBUTION_THRESHOLD)
+
+ lower, upper, forecast_json = compute_sarimax_threshold(history, PredictSensitivity.medium)
+
+ assert lower is None
+ assert upper is None
+ assert forecast_json is None
+
+
+# --- Uses first forecast date ---
+
+
+@patch(MOCK_TARGET)
+def test_uses_first_forecast_date(mock_forecast):
+ """Tolerances should be computed from the first row of the forecast."""
+ forecast = _make_forecast([100.0, 200.0], [10.0, 50.0])
+ mock_forecast.return_value = forecast
+ history = _make_history(T_DISTRIBUTION_THRESHOLD)
+
+ lower, upper, _ = compute_sarimax_threshold(history, PredictSensitivity.medium)
+
+ # Should use first row (mean=100, se=10), not second (mean=200, se=50)
+ assert lower == pytest.approx(100.0 + (-2.5 * 10.0))
+ assert upper == pytest.approx(100.0 + (2.5 * 10.0))
+
+
+# --- Z_SCORE_MAP completeness ---
+
+
+def test_z_score_map_covers_all_sensitivities():
+ """Every sensitivity level should have both lower and upper entries."""
+ for sensitivity in PredictSensitivity:
+ assert ("lower_tolerance", sensitivity) in Z_SCORE_MAP
+ assert ("upper_tolerance", sensitivity) in Z_SCORE_MAP
+
+
+@patch(MOCK_TARGET)
+def test_all_z_score_columns_added_to_forecast(mock_forecast):
+ forecast = _make_forecast([100.0], [10.0])
+ mock_forecast.return_value = forecast
+ history = _make_history(T_DISTRIBUTION_THRESHOLD)
+
+ compute_sarimax_threshold(history, PredictSensitivity.medium)
+
+ for key in Z_SCORE_MAP:
+ col = f"{key[0]}|{key[1].value}"
+ assert col in forecast.columns
diff --git a/tests/unit/common/notifications/test_profiling_run_notifications.py b/tests/unit/common/notifications/test_profiling_run_notifications.py
index c9e7ca38..5320b997 100644
--- a/tests/unit/common/notifications/test_profiling_run_notifications.py
+++ b/tests/unit/common/notifications/test_profiling_run_notifications.py
@@ -100,7 +100,7 @@ def test_send_profiling_run_notification(
hi_count_mock,
send_mock,
):
- profiling_run = ProfilingRun(id="pr-id", table_groups_id="tg-id", status=profiling_run_status)
+ profiling_run = ProfilingRun(id="pr-id", table_groups_id="tg-id", status=profiling_run_status, project_code="proj")
get_prev_mock.return_value = ProfilingRun(id="pr-prev-id") if has_prev_run else None
new_count = iter(count())
priorities = ("Definite", "Likely", "Possible", "High", "Moderate")
@@ -133,8 +133,8 @@ def test_send_profiling_run_notification(
{
"profiling_run": {
"id": "pr-id",
- "issues_url": "http://tg-base-url/profiling-runs:hygiene?run_id=pr-id&source=email",
- "results_url": "http://tg-base-url/profiling-runs:results?run_id=pr-id&source=email",
+ "issues_url": "http://tg-base-url/profiling-runs:hygiene?project_code=proj&run_id=pr-id&source=email",
+ "results_url": "http://tg-base-url/profiling-runs:results?project_code=proj&run_id=pr-id&source=email",
"start_time": None,
"end_time": None,
"status": profiling_run_status,
diff --git a/tests/unit/common/notifications/test_score_drop_notifications.py b/tests/unit/common/notifications/test_score_drop_notifications.py
index 26267578..76617976 100644
--- a/tests/unit/common/notifications/test_score_drop_notifications.py
+++ b/tests/unit/common/notifications/test_score_drop_notifications.py
@@ -171,7 +171,7 @@ def test_send_score_drop_notifications(
{
"project_name": "Test Project",
"definition": score_definition,
- "scorecard_url": "http://tg-base-url/quality-dashboard:score-details?definition_id=sd-1&source=email",
+ "scorecard_url": "http://tg-base-url/quality-dashboard:score-details?project_code=test-proj&definition_id=sd-1&source=email",
"diff": [
{**expected_total_diff, "notify": total_triggers},
{**expected_cde_diff, "notify": cde_triggers},
diff --git a/tests/unit/common/notifications/test_test_run_notifications.py b/tests/unit/common/notifications/test_test_run_notifications.py
index bde2d6ff..06cd75f9 100644
--- a/tests/unit/common/notifications/test_test_run_notifications.py
+++ b/tests/unit/common/notifications/test_test_run_notifications.py
@@ -151,7 +151,7 @@ def test_send_test_run_notification(
else:
diff_mock.return_value = create_diff(**diff_mock_args)
get_prev_mock.return_value = TestRun(id="tr-prev-id")
- summary = object()
+ summary = Mock(project_code="test_project")
select_summary_mock.return_value = [summary]
send_test_run_notifications(test_run)
@@ -174,7 +174,7 @@ def test_send_test_run_notification(
expected_context = {
"test_run": summary,
- "test_run_url": "http://tg-base-url/test-runs:results?run_id=tr-id&source=email",
+ "test_run_url": "http://tg-base-url/test-runs:results?project_code=test_project&run_id=tr-id&source=email",
"test_run_id": "tr-id",
"test_result_summary": ANY,
}
diff --git a/tests/unit/common/test_auth.py b/tests/unit/common/test_auth.py
new file mode 100644
index 00000000..87ccc5bb
--- /dev/null
+++ b/tests/unit/common/test_auth.py
@@ -0,0 +1,98 @@
+import base64
+from datetime import UTC, datetime, timedelta
+from unittest.mock import ANY, MagicMock, patch
+
+import bcrypt
+import jwt
+import pytest
+
+from testgen.common.auth import (
+ check_permission,
+ create_jwt_token,
+ decode_jwt_token,
+ verify_password,
+)
+
+JWT_KEY = base64.b64encode(b"test-secret-key-for-jwt-signing!").decode("ascii")
+TEST_PASSWORD = "testpass" # noqa: S105
+
+
+def _make_token(username="testuser", exp_days=30):
+ key = base64.b64decode(JWT_KEY.encode("ascii"))
+ payload = {
+ "username": username,
+ "exp_date": (datetime.now(UTC) + timedelta(days=exp_days)).timestamp(),
+ }
+ return jwt.encode(payload, key, algorithm="HS256")
+
+
+@patch("testgen.common.auth.settings")
+def test_create_jwt_token_creates_valid_token(mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+ token = create_jwt_token("testuser", expiry_days=7)
+
+ key = base64.b64decode(JWT_KEY.encode("ascii"))
+ payload = jwt.decode(token, key, algorithms=["HS256"])
+ assert payload["username"] == "testuser"
+ assert payload["exp_date"] > datetime.now(UTC).timestamp()
+
+
+@patch("testgen.common.auth.settings")
+def test_decode_jwt_token_decodes_valid_token(mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+ token = _make_token()
+ payload = decode_jwt_token(token)
+ assert payload["username"] == "testuser"
+
+
+@patch("testgen.common.auth.settings")
+def test_decode_jwt_token_raises_for_expired_token(mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+ token = _make_token(exp_days=-1)
+ with pytest.raises(ValueError, match="Token has expired"):
+ decode_jwt_token(token)
+
+
+@patch("testgen.common.auth.settings")
+def test_decode_jwt_token_raises_for_invalid_token(mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+ with pytest.raises(ValueError, match="Invalid token"):
+ decode_jwt_token("not-a-valid-token")
+
+
+def test_verify_password_correct():
+ hashed = bcrypt.hashpw(TEST_PASSWORD.encode(), bcrypt.gensalt()).decode()
+ assert verify_password(TEST_PASSWORD, hashed) is True
+
+
+def test_verify_password_wrong():
+ hashed = bcrypt.hashpw(TEST_PASSWORD.encode(), bcrypt.gensalt()).decode()
+ assert verify_password("wrongpass", hashed) is False
+
+
+def test_check_permission_allowed_with_plugin():
+ mock_rbac = MagicMock()
+ mock_rbac.check_permission.return_value = True
+ mock_hook = MagicMock()
+ mock_hook.rbac = mock_rbac
+ with patch("testgen.utils.plugins.PluginHook.instance", return_value=mock_hook):
+ assert check_permission(MagicMock(role="admin"), "edit") is True
+ mock_rbac.check_permission.assert_called_once_with(ANY, "edit")
+
+
+def test_check_permission_denied_with_plugin():
+ mock_rbac = MagicMock()
+ mock_rbac.check_permission.return_value = False
+ mock_hook = MagicMock()
+ mock_hook.rbac = mock_rbac
+ with patch("testgen.utils.plugins.PluginHook.instance", return_value=mock_hook):
+ assert check_permission(MagicMock(role="business"), "administer") is False
+
+
+def test_check_permission_defaults_without_plugin():
+ from testgen.utils.plugins import PluginHook
+
+ with patch("testgen.utils.plugins.PluginHook.instance") as mock_instance:
+ hook = PluginHook()
+ mock_instance.return_value = hook
+ assert check_permission(MagicMock(role="business"), "administer") is True
diff --git a/tests/unit/common/test_freshness_service.py b/tests/unit/common/test_freshness_service.py
index 6021e8e2..f8317413 100644
--- a/tests/unit/common/test_freshness_service.py
+++ b/tests/unit/common/test_freshness_service.py
@@ -904,6 +904,113 @@ def test_with_excluded_days(self):
assert 1700 <= result <= 1800
+class Test_WeekendScheduleInteraction:
+ """Test that schedule-inferred active_days supersedes exclude_weekends.
+
+ Scenario: Tables update Mon-Fri evenings, monitor runs at midnight & noon.
+ Schedule inference detects active_days = Tue-Sat (detection days, shifted
+ by one from update days). excluded_days = {Mon, Sun}.
+
+ Once the schedule is active, excluded_days should be the single source
+ of truth — Saturday (a detection day) should NOT be treated as excluded.
+ """
+
+ def _make_midnight_detection_schedule(self):
+ """Schedule for tables that update on weekday evenings and are detected
+ the following morning at midnight: Tue-Sat active, midnight window."""
+ return _make_schedule(
+ active_days=frozenset({1, 2, 3, 4, 5}), # Tue-Sat
+ window_start=0.0,
+ window_end=0.0,
+ )
+
+ def test_saturday_check_update_detected_passes(self):
+ """Friday update detected at Saturday midnight check should pass.
+
+ With the fix: exclude_weekends=False when excluded_days is present,
+ so Saturday is NOT excluded. Deadline lands on Saturday (next active day
+ after Friday), giving upper ~1620 min — well above the ~1440 min gap.
+ """
+ schedule = self._make_midnight_detection_schedule()
+ excluded_days = frozenset({0, 6}) # Mon, Sun
+ zi = zoneinfo.ZoneInfo(TZ)
+ # Friday midnight ET = 05:00 UTC (EST)
+ last_update = pd.Timestamp("2026-02-06T05:00", tz="UTC").tz_localize(None)
+
+ upper = minutes_to_next_deadline(
+ last_update, schedule,
+ exclude_weekends=False, # The fix: schedule supersedes this
+ holiday_dates=None, tz=TZ, buffer_hours=3.0,
+ excluded_days=excluded_days,
+ )
+ assert upper is not None
+ # ~1620 min (Fri midnight to Sat 3AM, no excluded time on Saturday)
+ assert upper > 1500
+
+ # The actual gap (Fri midnight to Sat midnight) is ~1440 min
+ # which should be well within the tolerance
+ assert 1440 < upper
+
+ def test_saturday_check_is_not_excluded_day(self):
+ """Saturday should not be IS_EXCLUDED_DAY when schedule says it's active."""
+ zi = zoneinfo.ZoneInfo(TZ)
+ excluded_days = frozenset({0, 6}) # Mon, Sun
+ # Saturday midnight ET = 05:00 UTC (EST, Feb before DST)
+ sat_run = pd.Timestamp("2026-02-07T05:00", tz="UTC").tz_localize(None)
+
+ result = is_excluded_day(
+ sat_run,
+ exclude_weekends=False, # The fix
+ holiday_dates=None,
+ tz=TZ,
+ excluded_days=excluded_days,
+ )
+ # Saturday (weekday 5) is NOT in excluded_days {0, 6}
+ assert result is False
+
+ def test_sunday_still_excluded(self):
+ """Sunday should remain excluded (in excluded_days={0, 6})."""
+ excluded_days = frozenset({0, 6})
+ sun_run = pd.Timestamp("2026-02-08T05:00", tz="UTC").tz_localize(None)
+
+ result = is_excluded_day(
+ sun_run,
+ exclude_weekends=False,
+ holiday_dates=None,
+ tz=TZ,
+ excluded_days=excluded_days,
+ )
+ assert result is True
+
+ def test_monday_still_excluded(self):
+ """Monday should remain excluded (in excluded_days={0, 6}, weekday 0)."""
+ excluded_days = frozenset({0, 6})
+ mon_run = pd.Timestamp("2026-02-09T05:00", tz="UTC").tz_localize(None)
+
+ result = is_excluded_day(
+ mon_run,
+ exclude_weekends=False,
+ holiday_dates=None,
+ tz=TZ,
+ excluded_days=excluded_days,
+ )
+ assert result is True
+
+ def test_saturday_excluded_minutes_zero_for_weekday_gap(self):
+ """No excluded minutes between Friday and Saturday when Saturday is active."""
+ excluded_days = frozenset({0, 6})
+ fri = pd.Timestamp("2026-02-06T05:00") # Fri midnight ET
+ sat = pd.Timestamp("2026-02-07T05:00") # Sat midnight ET
+
+ excl = count_excluded_minutes(
+ fri, sat,
+ exclude_weekends=False,
+ holiday_dates=None,
+ tz=TZ,
+ excluded_days=excluded_days,
+ )
+ assert excl == 0
+
# ---------------------------------------------------------------------------
# is_excluded_day with window_start/window_end Tests
diff --git a/tests/unit/common/test_pii_masking.py b/tests/unit/common/test_pii_masking.py
new file mode 100644
index 00000000..b336ad43
--- /dev/null
+++ b/tests/unit/common/test_pii_masking.py
@@ -0,0 +1,300 @@
+import pandas as pd
+
+from testgen.common.pii_masking import PII_REDACTED, mask_hygiene_detail, mask_profiling_pii, mask_source_data_pii
+
+
+class Test_mask_source_data_pii:
+ def test_masks_pii_columns(self):
+ df = pd.DataFrame({
+ "name": ["Alice", "Bob"],
+ "ssn": ["123-45-6789", "987-65-4321"],
+ "age": [30, 25],
+ })
+ mask_source_data_pii(df, {"ssn"})
+ assert df["ssn"].tolist() == [PII_REDACTED, PII_REDACTED]
+ assert df["age"].tolist() == [30, 25]
+ assert df["name"].tolist() == ["Alice", "Bob"]
+
+ def test_preserves_non_pii_columns(self):
+ df = pd.DataFrame({"col_a": [1, 2], "col_b": ["x", "y"]})
+ mask_source_data_pii(df, {"col_a"})
+ assert df["col_b"].tolist() == ["x", "y"]
+
+ def test_handles_empty_dataframe(self):
+ df = pd.DataFrame(columns=["name", "ssn"])
+ mask_source_data_pii(df, {"ssn"})
+ assert df.empty
+
+ def test_handles_missing_pii_column(self):
+ df = pd.DataFrame({"col_a": [1, 2]})
+ mask_source_data_pii(df, {"nonexistent_col"})
+ assert df["col_a"].tolist() == [1, 2]
+
+ def test_handles_empty_pii_set(self):
+ df = pd.DataFrame({"col_a": [1, 2]})
+ mask_source_data_pii(df, set())
+ assert df["col_a"].tolist() == [1, 2]
+
+ def test_case_insensitive_matching(self):
+ df = pd.DataFrame({"SSN": ["123-45-6789"], "Name": ["Alice"]})
+ mask_source_data_pii(df, {"ssn"})
+ assert df["SSN"].tolist() == [PII_REDACTED]
+ assert df["Name"].tolist() == ["Alice"]
+
+ def test_multiple_pii_columns(self):
+ df = pd.DataFrame({
+ "name": ["Alice"],
+ "ssn": ["123"],
+ "email": ["a@b.com"],
+ "age": [30],
+ })
+ mask_source_data_pii(df, {"ssn", "email"})
+ assert df["ssn"].tolist() == [PII_REDACTED]
+ assert df["email"].tolist() == [PII_REDACTED]
+ assert df["name"].tolist() == ["Alice"]
+ assert df["age"].tolist() == [30]
+
+
+class Test_mask_profiling_pii:
+ def _make_profiling_df(self):
+ return pd.DataFrame({
+ "column_name": ["ssn", "age", "email"],
+ "top_freq_values": ["123|456", "30|25", "a@b|c@d"],
+ "min_text": ["000", "20", "a@a"],
+ "max_text": ["999", "40", "z@z"],
+ "min_value": [0, 20, None],
+ "max_value": [999, 40, None],
+ })
+
+ def test_masks_pii_profiling_fields(self):
+ df = self._make_profiling_df()
+ mask_profiling_pii(df, {"ssn", "email"})
+
+ ssn_row = df[df["column_name"] == "ssn"].iloc[0]
+ assert ssn_row["top_freq_values"] == PII_REDACTED
+ assert ssn_row["min_text"] == PII_REDACTED
+ assert ssn_row["max_text"] == PII_REDACTED
+ assert ssn_row["min_value"] == PII_REDACTED
+ assert ssn_row["max_value"] == PII_REDACTED
+
+ email_row = df[df["column_name"] == "email"].iloc[0]
+ assert email_row["top_freq_values"] == PII_REDACTED
+
+ def test_preserves_non_pii_rows(self):
+ df = self._make_profiling_df()
+ mask_profiling_pii(df, {"ssn"})
+
+ age_row = df[df["column_name"] == "age"].iloc[0]
+ assert age_row["top_freq_values"] == "30|25"
+ assert age_row["min_text"] == "20"
+ assert age_row["max_text"] == "40"
+
+ def test_handles_empty_dataframe(self):
+ df = pd.DataFrame(columns=["column_name", "top_freq_values"])
+ mask_profiling_pii(df, {"ssn"})
+ assert df.empty
+
+ def test_handles_empty_pii_set(self):
+ df = self._make_profiling_df()
+ original_values = df["top_freq_values"].tolist()
+ mask_profiling_pii(df, set())
+ assert df["top_freq_values"].tolist() == original_values
+
+ def test_handles_missing_fields(self):
+ df = pd.DataFrame({
+ "column_name": ["ssn", "age"],
+ "top_freq_values": ["123", "30"],
+ })
+ mask_profiling_pii(df, {"ssn"})
+ assert df.loc[0, "top_freq_values"] == PII_REDACTED
+ assert df.loc[1, "top_freq_values"] == "30"
+
+ def test_case_insensitive_column_name_matching(self):
+ df = pd.DataFrame({
+ "column_name": ["SSN", "age"],
+ "top_freq_values": ["123", "30"],
+ "min_text": ["000", "20"],
+ })
+ mask_profiling_pii(df, {"ssn"})
+ assert df.loc[0, "top_freq_values"] == PII_REDACTED
+ assert df.loc[0, "min_text"] == PII_REDACTED
+ assert df.loc[1, "top_freq_values"] == "30"
+
+
+class Test_mask_profiling_pii_dict:
+ def test_masks_fields_when_column_is_pii(self):
+ data = {
+ "column_name": "ssn",
+ "top_freq_values": "123|456",
+ "min_text": "000",
+ "max_text": "999",
+ "min_value": 0,
+ "max_value": 999,
+ "min_value_over_0": 1,
+ "min_date": "2024-01-01",
+ "max_date": "2024-12-31",
+ }
+ mask_profiling_pii(data, {"ssn"})
+ assert data["top_freq_values"] == PII_REDACTED
+ assert data["min_text"] == PII_REDACTED
+ assert data["max_text"] == PII_REDACTED
+ assert data["min_value"] == PII_REDACTED
+ assert data["max_value"] == PII_REDACTED
+ assert data["min_value_over_0"] == PII_REDACTED
+ assert data["min_date"] == PII_REDACTED
+ assert data["max_date"] == PII_REDACTED
+
+ def test_preserves_non_pii_column(self):
+ data = {
+ "column_name": "age",
+ "top_freq_values": "30|25",
+ "min_text": "20",
+ "max_text": "40",
+ }
+ mask_profiling_pii(data, {"ssn"})
+ assert data["top_freq_values"] == "30|25"
+ assert data["min_text"] == "20"
+ assert data["max_text"] == "40"
+
+ def test_case_insensitive_matching(self):
+ data = {"column_name": "SSN", "min_text": "000"}
+ mask_profiling_pii(data, {"ssn"})
+ assert data["min_text"] == PII_REDACTED
+
+ def test_empty_pii_set_skips_masking(self):
+ data = {"column_name": "ssn", "min_text": "000"}
+ mask_profiling_pii(data, set())
+ assert data["min_text"] == "000"
+
+ def test_missing_fields_handled(self):
+ data = {"column_name": "ssn", "min_text": "000"}
+ mask_profiling_pii(data, {"ssn"})
+ assert data["min_text"] == PII_REDACTED
+ assert "top_freq_values" not in data
+
+ def test_no_column_name_masks_unconditionally(self):
+ data = {"top_freq_values": "123|456", "min_text": "000"}
+ mask_profiling_pii(data, {"ssn"})
+ assert data["top_freq_values"] == PII_REDACTED
+ assert data["min_text"] == PII_REDACTED
+
+ def test_preserves_non_profiling_fields(self):
+ data = {
+ "column_name": "ssn",
+ "top_freq_values": "123",
+ "record_ct": 100,
+ "distinct_value_ct": 50,
+ }
+ mask_profiling_pii(data, {"ssn"})
+ assert data["top_freq_values"] == PII_REDACTED
+ assert data["record_ct"] == 100
+ assert data["distinct_value_ct"] == 50
+
+
+class Test_mask_hygiene_detail_dataframe:
+ def test_masks_detail_for_pii_redactable_rows(self):
+ df = pd.DataFrame({
+ "column_name": ["ssn", "age", "email"],
+ "detail": ["SSN range: 100-999", "Count: 50", "Email range: a@b - z@y"],
+ "detail_redactable": [True, False, True],
+ "pii_flag": ["A/ID/SSN", None, "B/CONTACT/Email"],
+ })
+ mask_hygiene_detail(df)
+ assert df.loc[0, "detail"] == PII_REDACTED
+ assert df.loc[1, "detail"] == "Count: 50"
+ assert df.loc[2, "detail"] == PII_REDACTED
+
+ def test_preserves_non_redactable_pii_rows(self):
+ df = pd.DataFrame({
+ "column_name": ["ssn"],
+ "detail": ["Non-printing chars: 5"],
+ "detail_redactable": [False],
+ "pii_flag": ["A/ID/SSN"],
+ })
+ mask_hygiene_detail(df)
+ assert df.loc[0, "detail"] == "Non-printing chars: 5"
+
+ def test_preserves_redactable_non_pii_rows(self):
+ df = pd.DataFrame({
+ "column_name": ["age"],
+ "detail": ["Date range: 2020-2024"],
+ "detail_redactable": [True],
+ "pii_flag": [None],
+ })
+ mask_hygiene_detail(df)
+ assert df.loc[0, "detail"] == "Date range: 2020-2024"
+
+ def test_handles_empty_dataframe(self):
+ df = pd.DataFrame(columns=["column_name", "detail", "detail_redactable", "pii_flag"])
+ mask_hygiene_detail(df)
+ assert df.empty
+
+ def test_handles_missing_detail_redactable_column(self):
+ df = pd.DataFrame({
+ "column_name": ["ssn"],
+ "detail": ["some detail"],
+ "pii_flag": ["A/ID/SSN"],
+ })
+ mask_hygiene_detail(df)
+ assert df.loc[0, "detail"] == "some detail"
+
+ def test_handles_null_detail_redactable(self):
+ df = pd.DataFrame({
+ "column_name": ["ssn"],
+ "detail": ["SSN range: 100-999"],
+ "detail_redactable": [None],
+ "pii_flag": ["A/ID/SSN"],
+ })
+ mask_hygiene_detail(df)
+ assert df.loc[0, "detail"] == "SSN range: 100-999"
+
+
+class Test_mask_hygiene_detail_list_with_pii_flag:
+ def test_masks_detail_when_redactable_and_pii(self):
+ issues = [
+ {"detail": "Date range: 2020-2024", "detail_redactable": True, "pii_flag": "A/ID/SSN"},
+ {"detail": "Count: 50", "detail_redactable": False, "pii_flag": "A/ID/SSN"},
+ {"detail": "Min text: Alice", "detail_redactable": True, "pii_flag": None},
+ ]
+ mask_hygiene_detail(issues)
+ assert issues[0]["detail"] == PII_REDACTED
+ assert issues[1]["detail"] == "Count: 50"
+ assert issues[2]["detail"] == "Min text: Alice"
+
+ def test_handles_empty_list(self):
+ issues = []
+ mask_hygiene_detail(issues)
+ assert issues == []
+
+ def test_handles_missing_fields(self):
+ issues = [{"detail": "some detail"}]
+ mask_hygiene_detail(issues)
+ assert issues[0]["detail"] == "some detail"
+
+
+class Test_mask_hygiene_detail_list_with_pii_columns:
+ def test_masks_detail_when_column_is_pii(self):
+ issues = [
+ {"column_name": "ssn", "detail": "Date range: 2020-2024", "detail_redactable": True},
+ {"column_name": "age", "detail": "Count: 50", "detail_redactable": True},
+ {"column_name": "email", "detail": "Min text: a@b", "detail_redactable": True},
+ ]
+ mask_hygiene_detail(issues, pii_columns={"ssn", "email"})
+ assert issues[0]["detail"] == PII_REDACTED
+ assert issues[1]["detail"] == "Count: 50"
+ assert issues[2]["detail"] == PII_REDACTED
+
+ def test_case_insensitive_column_matching(self):
+ issues = [{"column_name": "SSN", "detail": "range: 100-999", "detail_redactable": True}]
+ mask_hygiene_detail(issues, pii_columns={"ssn"})
+ assert issues[0]["detail"] == PII_REDACTED
+
+ def test_empty_pii_columns_skips_masking(self):
+ issues = [{"column_name": "ssn", "detail": "range: 100-999", "detail_redactable": True}]
+ mask_hygiene_detail(issues, pii_columns=set())
+ assert issues[0]["detail"] == "range: 100-999"
+
+ def test_non_redactable_issues_preserved(self):
+ issues = [{"column_name": "ssn", "detail": "Non-printing: 5", "detail_redactable": False}]
+ mask_hygiene_detail(issues, pii_columns={"ssn"})
+ assert issues[0]["detail"] == "Non-printing: 5"
diff --git a/tests/unit/mcp/__init__.py b/tests/unit/mcp/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unit/mcp/conftest.py b/tests/unit/mcp/conftest.py
new file mode 100644
index 00000000..dd706d27
--- /dev/null
+++ b/tests/unit/mcp/conftest.py
@@ -0,0 +1,47 @@
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+
+from testgen.mcp.permissions import set_mcp_username
+
+# Fictional role matrix for tests. role_a has full access, role_c is restricted.
+TEST_PERM_MATRIX = {
+ "view": ["role_a", "role_b"],
+ "catalog": ["role_a", "role_b", "role_c"],
+}
+
+
+def _test_roles_with_permission(permission):
+ return TEST_PERM_MATRIX.get(permission, [])
+
+
+@pytest.fixture(autouse=True)
+def mcp_user():
+ """Set up an authenticated MCP user for all tool tests.
+
+ Default: user has 'role_a' on 'demo' project (full access).
+ The @mcp_permission decorator passes for any permission.
+
+ Tests needing scoped access patch _compute_project_permissions directly.
+ """
+ set_mcp_username("test_user")
+ user = MagicMock()
+ user.id = uuid4()
+
+ membership = MagicMock()
+ membership.project_code = "demo"
+ membership.role = "role_a"
+
+ with (
+ patch("testgen.mcp.permissions.User") as mock_user_cls,
+ patch("testgen.mcp.permissions.ProjectMembership") as mock_membership,
+ patch("testgen.mcp.permissions.PluginHook") as mock_hook,
+ ):
+ mock_user_cls.get.return_value = user
+ mock_membership.get_memberships_for_user.return_value = [membership]
+ mock_hook.instance.return_value.rbac.get_roles_with_permission.side_effect = (
+ _test_roles_with_permission
+ )
+ yield user
+ set_mcp_username(None)
diff --git a/tests/unit/mcp/test_auth.py b/tests/unit/mcp/test_auth.py
new file mode 100644
index 00000000..ab0d4973
--- /dev/null
+++ b/tests/unit/mcp/test_auth.py
@@ -0,0 +1,138 @@
+import asyncio
+import base64
+from datetime import UTC, datetime, timedelta
+from unittest.mock import MagicMock, patch
+
+import bcrypt
+import jwt
+import pytest
+
+from testgen.mcp.auth import authenticate_user, validate_token
+from testgen.mcp.server import JWTTokenVerifier
+
+JWT_KEY = base64.b64encode(b"test-secret-key-for-jwt-signing!").decode("ascii")
+TEST_PASSWORD = "testpass" # noqa: S105
+
+
+def _make_user(username="testuser", role="admin"):
+ hashed = bcrypt.hashpw(TEST_PASSWORD.encode(), bcrypt.gensalt()).decode()
+ user = MagicMock()
+ user.username = username
+ user.password = hashed
+ user.role = role
+ return user
+
+
+def _make_token(username="testuser", exp_days=30):
+ key = base64.b64decode(JWT_KEY.encode("ascii"))
+ payload = {
+ "username": username,
+ "exp_date": (datetime.now(UTC) + timedelta(days=exp_days)).timestamp(),
+ }
+ return jwt.encode(payload, key, algorithm="HS256")
+
+
+@patch("testgen.common.auth.settings")
+@patch("testgen.mcp.auth.User")
+def test_authenticate_user_returns_jwt(mock_user_cls, mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+ mock_user_cls.get.return_value = _make_user()
+
+ token = authenticate_user("testuser", TEST_PASSWORD)
+
+ key = base64.b64decode(JWT_KEY.encode("ascii"))
+ payload = jwt.decode(token, key, algorithms=["HS256"])
+ assert payload["username"] == "testuser"
+ assert payload["exp_date"] > datetime.now(UTC).timestamp()
+
+
+@patch("testgen.common.auth.settings")
+@patch("testgen.mcp.auth.User")
+def test_authenticate_user_raises_for_wrong_password(mock_user_cls, mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+ mock_user_cls.get.return_value = _make_user()
+
+ with pytest.raises(ValueError, match="Invalid username or password"):
+ authenticate_user("testuser", "wrongpass")
+
+
+@patch("testgen.common.auth.settings")
+@patch("testgen.mcp.auth.User")
+def test_authenticate_user_raises_for_unknown_user(mock_user_cls, mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+ mock_user_cls.get.return_value = None
+
+ with pytest.raises(ValueError, match="Invalid username or password"):
+ authenticate_user("nobody", TEST_PASSWORD)
+
+
+@patch("testgen.common.auth.settings")
+@patch("testgen.mcp.auth.User")
+def test_validate_token_returns_user(mock_user_cls, mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+ expected_user = _make_user()
+ mock_user_cls.get.return_value = expected_user
+
+ user = validate_token(_make_token())
+
+ assert user is expected_user
+ mock_user_cls.get.assert_called_once_with("testuser")
+
+
+@patch("testgen.common.auth.settings")
+def test_validate_token_raises_for_expired_token(mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+
+ with pytest.raises(ValueError, match="Token has expired"):
+ validate_token(_make_token(exp_days=-1))
+
+
+@patch("testgen.common.auth.settings")
+def test_validate_token_raises_for_invalid_token(mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+
+ with pytest.raises(ValueError, match="Invalid token"):
+ validate_token("not-a-valid-token")
+
+
+@patch("testgen.common.auth.settings")
+@patch("testgen.mcp.auth.User")
+def test_validate_token_raises_for_missing_user(mock_user_cls, mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+ mock_user_cls.get.return_value = None
+
+ with pytest.raises(ValueError, match="User not found"):
+ validate_token(_make_token())
+
+
+@patch("testgen.common.auth.settings")
+def test_token_verifier_returns_access_token_for_valid_jwt(mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+ verifier = JWTTokenVerifier()
+ token = _make_token()
+
+ result = asyncio.run(verifier.verify_token(token))
+
+ assert result is not None
+ assert result.client_id == "testuser"
+ assert result.token == token
+
+
+@patch("testgen.common.auth.settings")
+def test_token_verifier_returns_none_for_expired_jwt(mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+ verifier = JWTTokenVerifier()
+
+ result = asyncio.run(verifier.verify_token(_make_token(exp_days=-1)))
+
+ assert result is None
+
+
+@patch("testgen.common.auth.settings")
+def test_token_verifier_returns_none_for_invalid_jwt(mock_settings):
+ mock_settings.JWT_HASHING_KEY_B64 = JWT_KEY
+ verifier = JWTTokenVerifier()
+
+ result = asyncio.run(verifier.verify_token("garbage"))
+
+ assert result is None
diff --git a/tests/unit/mcp/test_error_boundary.py b/tests/unit/mcp/test_error_boundary.py
new file mode 100644
index 00000000..a49ca36a
--- /dev/null
+++ b/tests/unit/mcp/test_error_boundary.py
@@ -0,0 +1,61 @@
+"""Tests for the mcp_error_boundary decorator."""
+
+import logging
+
+from testgen.mcp.exceptions import MCPPermissionDenied, MCPUserError, mcp_error_handler
+
+
+def test_returns_normal_result():
+ @mcp_error_handler
+ def my_tool(x: int) -> str:
+ return f"result: {x}"
+
+ assert my_tool(42) == "result: 42"
+
+
+def test_converts_mcp_user_error_to_string():
+ @mcp_error_handler
+ def failing_tool():
+ raise MCPUserError("Invalid table_group_id: `abc` is not a valid UUID.")
+
+ assert failing_tool() == "Invalid table_group_id: `abc` is not a valid UUID."
+
+
+def test_converts_permission_denied_to_string():
+ @mcp_error_handler
+ def restricted_tool():
+ raise MCPPermissionDenied("Your role does not include the necessary permission.")
+
+ assert restricted_tool() == "Your role does not include the necessary permission."
+
+
+def test_catches_unexpected_error_and_returns_neutral_message():
+ @mcp_error_handler
+ def broken_tool():
+ raise RuntimeError("DB connection pool exhausted")
+
+ result = broken_tool()
+ assert result == "An unexpected error occurred."
+ assert "DB connection pool" not in result
+
+
+def test_logs_unexpected_error_traceback(caplog):
+ @mcp_error_handler
+ def broken_tool():
+ raise RuntimeError("secret internal detail")
+
+ with caplog.at_level(logging.ERROR, logger="testgen"):
+ broken_tool()
+
+ assert "secret internal detail" in caplog.text
+ assert "broken_tool" in caplog.text
+
+
+def test_preserves_function_metadata():
+ @mcp_error_handler
+ def my_tool(x: int, y: str = "default") -> str:
+ """Tool docstring."""
+ return f"{x}-{y}"
+
+ assert my_tool.__name__ == "my_tool"
+ assert my_tool.__doc__ == "Tool docstring."
diff --git a/tests/unit/mcp/test_inventory_service.py b/tests/unit/mcp/test_inventory_service.py
new file mode 100644
index 00000000..715ef476
--- /dev/null
+++ b/tests/unit/mcp/test_inventory_service.py
@@ -0,0 +1,140 @@
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+
+
+@pytest.fixture
+def session_mock():
+ with patch("testgen.mcp.services.inventory_service.get_current_session") as mock:
+ yield mock.return_value
+
+
+def _make_row(project_code="demo", project_name="Demo", connection_id=1, connection_name="main",
+ table_group_id=None, table_groups_name="core",
+ table_group_schema="public", test_suite_id=None, test_suite="Quality"):
+ row = MagicMock()
+ row.project_code = project_code
+ row.project_name = project_name
+ row.connection_id = connection_id
+ row.connection_name = connection_name
+ row.table_group_id = table_group_id or uuid4()
+ row.table_groups_name = table_groups_name
+ row.table_group_schema = table_group_schema
+ row.test_suite_id = test_suite_id or uuid4()
+ row.test_suite = test_suite
+ return row
+
+
+@patch("testgen.mcp.services.inventory_service.select")
+def test_get_inventory_basic(mock_select, session_mock):
+ tg_id = uuid4()
+ row = _make_row(table_group_id=tg_id)
+ session_mock.execute.return_value.all.return_value = [row]
+
+ from testgen.mcp.services.inventory_service import get_inventory
+
+ result = get_inventory(project_codes=["demo"], view_project_codes=["demo"])
+
+ assert "Data Inventory" in result
+ assert "Demo" in result
+ assert "main" in result
+ assert "core" in result
+ assert "Quality" in result
+
+
+@patch("testgen.mcp.services.inventory_service.select")
+def test_get_inventory_empty(mock_select, session_mock):
+ session_mock.execute.return_value.all.return_value = []
+
+ from testgen.mcp.services.inventory_service import get_inventory
+
+ result = get_inventory(project_codes=["demo"], view_project_codes=["demo"])
+
+ assert "Data Inventory" in result
+
+
+@patch("testgen.mcp.services.inventory_service.select")
+def test_get_inventory_project_no_connections(mock_select, session_mock):
+ row = _make_row(connection_id=None)
+ session_mock.execute.return_value.all.return_value = [row]
+
+ from testgen.mcp.services.inventory_service import get_inventory
+
+ result = get_inventory(project_codes=["demo"], view_project_codes=["demo"])
+
+ assert "Demo" in result
+ assert "No connections" in result
+
+
+@patch("testgen.mcp.services.inventory_service.select")
+def test_get_inventory_includes_list_tables_hint(mock_select, session_mock):
+ session_mock.execute.return_value.all.return_value = [_make_row()]
+
+ from testgen.mcp.services.inventory_service import get_inventory
+
+ result = get_inventory(project_codes=["demo"], view_project_codes=["demo"])
+
+ assert "list_tables" in result
+
+
+@patch("testgen.mcp.services.inventory_service.select")
+def test_get_inventory_compact_groups(mock_select, session_mock):
+ """When >50 groups, group output uses single-line compact format."""
+ rows = [
+ _make_row(
+ table_group_id=uuid4(),
+ table_groups_name=f"Group_{i}",
+ test_suite=f"Suite_{i}",
+ test_suite_id=uuid4(),
+ )
+ for i in range(55)
+ ]
+ session_mock.execute.return_value.all.return_value = rows
+
+ from testgen.mcp.services.inventory_service import get_inventory
+
+ result = get_inventory(project_codes=["demo"], view_project_codes=["demo"])
+
+ # Compact groups: single line with "test suites: N", no "#### Table Group:" headers
+ assert "test suites:" in result
+ assert "#### Table Group:" not in result
+
+
+@patch("testgen.mcp.services.inventory_service.select")
+def test_get_inventory_without_view_hides_connections_and_suites(mock_select, session_mock):
+ """Without view permission: connection names hidden, table groups shown in compact format, suites hidden."""
+ tg_id = uuid4()
+ suite_id = uuid4()
+ row = _make_row(table_group_id=tg_id, test_suite_id=suite_id, test_suite="Secret Suite")
+ session_mock.execute.return_value.all.return_value = [row]
+
+ from testgen.mcp.services.inventory_service import get_inventory
+
+ result = get_inventory(project_codes=["demo"], view_project_codes=[])
+
+ assert "Demo" in result
+ assert "main" not in result # connection name hidden
+ assert "core" in result # table group still shown
+ assert str(tg_id) in result # table group id still shown
+ assert "Secret Suite" not in result # suite name hidden
+ assert str(suite_id) not in result # suite id hidden
+ assert "test suites: 1" in result # suite count shown
+
+
+@patch("testgen.mcp.services.inventory_service.select")
+def test_get_inventory_with_view_shows_all_details(mock_select, session_mock):
+ """With view permission: connections, table groups, and suites all shown."""
+ tg_id = uuid4()
+ suite_id = uuid4()
+ row = _make_row(table_group_id=tg_id, test_suite_id=suite_id, test_suite="Visible Suite")
+ session_mock.execute.return_value.all.return_value = [row]
+
+ from testgen.mcp.services.inventory_service import get_inventory
+
+ result = get_inventory(project_codes=["demo"], view_project_codes=["demo"])
+
+ assert "main" in result # connection name shown
+ assert "Visible Suite" in result
+ assert str(suite_id) in result
+ assert "requires `view` permission" not in result
diff --git a/tests/unit/mcp/test_model_data_table.py b/tests/unit/mcp/test_model_data_table.py
new file mode 100644
index 00000000..0f9f10e2
--- /dev/null
+++ b/tests/unit/mcp/test_model_data_table.py
@@ -0,0 +1,41 @@
+from unittest.mock import patch
+from uuid import uuid4
+
+from testgen.common.models.data_table import DataTable
+
+
+@patch("testgen.common.models.data_table.get_current_session")
+def test_select_table_names_returns_list(session_mock):
+ session_mock.return_value.scalars.return_value.all.return_value = ["customers", "orders", "products"]
+
+ result = DataTable.select_table_names(table_groups_id=uuid4())
+
+ assert result == ["customers", "orders", "products"]
+ session_mock.return_value.scalars.assert_called_once()
+
+
+@patch("testgen.common.models.data_table.get_current_session")
+def test_select_table_names_empty(session_mock):
+ session_mock.return_value.scalars.return_value.all.return_value = []
+
+ result = DataTable.select_table_names(table_groups_id=uuid4())
+
+ assert result == []
+
+
+@patch("testgen.common.models.data_table.get_current_session")
+def test_count_tables(session_mock):
+ session_mock.return_value.scalar.return_value = 42
+
+ result = DataTable.count_tables(table_groups_id=uuid4())
+
+ assert result == 42
+
+
+@patch("testgen.common.models.data_table.get_current_session")
+def test_count_tables_none_returns_zero(session_mock):
+ session_mock.return_value.scalar.return_value = None
+
+ result = DataTable.count_tables(table_groups_id=uuid4())
+
+ assert result == 0
diff --git a/tests/unit/mcp/test_model_test_result.py b/tests/unit/mcp/test_model_test_result.py
new file mode 100644
index 00000000..f04949b4
--- /dev/null
+++ b/tests/unit/mcp/test_model_test_result.py
@@ -0,0 +1,103 @@
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+
+from testgen.common.models.test_result import TestResult, TestResultStatus
+
+
+@pytest.fixture
+def session_mock():
+ with patch("testgen.common.models.test_result.get_current_session") as mock:
+ yield mock.return_value
+
+
+def test_select_results_basic(session_mock):
+ mock_results = [MagicMock(spec=TestResult)]
+ session_mock.scalars.return_value.all.return_value = mock_results
+
+ results = TestResult.select_results(test_run_id=uuid4())
+
+ assert results == mock_results
+ session_mock.scalars.assert_called_once()
+
+
+def test_select_results_with_status_filter(session_mock):
+ session_mock.scalars.return_value.all.return_value = []
+
+ results = TestResult.select_results(test_run_id=uuid4(), status=TestResultStatus.Failed)
+
+ assert results == []
+
+
+def test_select_results_with_all_filters(session_mock):
+ session_mock.scalars.return_value.all.return_value = []
+
+ results = TestResult.select_results(
+ test_run_id=uuid4(),
+ status=TestResultStatus.Passed,
+ table_name="orders",
+ test_type="Alpha_Trunc",
+ limit=10,
+ )
+
+ assert results == []
+
+
+def test_select_failures_by_test_type(session_mock):
+ session_mock.execute.return_value.all.return_value = [
+ ("Alpha_Trunc", TestResultStatus.Failed, 5),
+ ("Unique_Pct", TestResultStatus.Warning, 3),
+ ]
+
+ results = TestResult.select_failures(test_run_id=uuid4(), group_by="test_type")
+
+ assert len(results) == 2
+ assert results[0] == ("Alpha_Trunc", TestResultStatus.Failed, 5)
+
+
+def test_select_failures_by_table_name(session_mock):
+ session_mock.execute.return_value.all.return_value = [("orders", 8)]
+
+ results = TestResult.select_failures(test_run_id=uuid4(), group_by="table_name")
+
+ assert results[0] == ("orders", 8)
+
+
+def test_select_failures_by_column_names(session_mock):
+ session_mock.execute.return_value.all.return_value = [("orders", "customer_name", 4)]
+
+ results = TestResult.select_failures(test_run_id=uuid4(), group_by="column_names")
+
+ assert results[0] == ("orders", "customer_name", 4)
+
+
+def test_select_failures_invalid_group_by():
+ with pytest.raises(ValueError, match="group_by must be one of"):
+ TestResult.select_failures(test_run_id=uuid4(), group_by="invalid_column")
+
+
+def test_select_failures_empty(session_mock):
+ session_mock.execute.return_value.all.return_value = []
+
+ results = TestResult.select_failures(test_run_id=uuid4())
+
+ assert results == []
+
+
+def test_select_history_basic(session_mock):
+ mock_results = [MagicMock(spec=TestResult), MagicMock(spec=TestResult)]
+ session_mock.scalars.return_value.all.return_value = mock_results
+
+ results = TestResult.select_history(test_definition_id=uuid4())
+
+ assert results == mock_results
+ session_mock.scalars.assert_called_once()
+
+
+def test_select_history_empty(session_mock):
+ session_mock.scalars.return_value.all.return_value = []
+
+ results = TestResult.select_history(test_definition_id=uuid4(), limit=10)
+
+ assert results == []
diff --git a/tests/unit/mcp/test_permissions.py b/tests/unit/mcp/test_permissions.py
new file mode 100644
index 00000000..6f7b1512
--- /dev/null
+++ b/tests/unit/mcp/test_permissions.py
@@ -0,0 +1,261 @@
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+
+from testgen.mcp.exceptions import MCPPermissionDenied
+from testgen.mcp.permissions import (
+ _NOT_SET,
+ ProjectPermissions,
+ _compute_project_permissions,
+ _mcp_project_permissions,
+ get_current_mcp_user,
+ get_project_permissions,
+ mcp_permission,
+ set_mcp_username,
+)
+
+
+@pytest.fixture(autouse=True)
+def _reset_contextvars():
+ set_mcp_username(None)
+ tok = _mcp_project_permissions.set(_NOT_SET)
+ yield
+ set_mcp_username(None)
+ _mcp_project_permissions.reset(tok)
+
+
+# --- get_current_mcp_user ---
+
+
+def test_get_current_mcp_user_raises_when_no_username():
+ with pytest.raises(RuntimeError, match="No authenticated user"):
+ get_current_mcp_user()
+
+
+@patch("testgen.mcp.permissions.User")
+def test_get_current_mcp_user_raises_when_user_not_found(mock_user):
+ mock_user.get.return_value = None
+ set_mcp_username("ghost")
+
+ with pytest.raises(ValueError, match="Authenticated user not found: ghost"):
+ get_current_mcp_user()
+
+
+@patch("testgen.mcp.permissions.User")
+def test_get_current_mcp_user_returns_user(mock_user):
+ user = MagicMock()
+ mock_user.get.return_value = user
+ set_mcp_username("admin")
+
+ result = get_current_mcp_user()
+
+ assert result is user
+ mock_user.get.assert_called_once_with("admin")
+
+
+# --- _compute_project_permissions ---
+
+
+@patch("testgen.mcp.permissions.ProjectMembership")
+def test_compute_project_permissions_returns_memberships(mock_membership):
+ user = MagicMock()
+ user.id = uuid4()
+
+ m1 = MagicMock()
+ m1.project_code = "proj_a"
+ m1.role = "role_a"
+ m2 = MagicMock()
+ m2.project_code = "proj_b"
+ m2.role = "role_c"
+ mock_membership.get_memberships_for_user.return_value = [m1, m2]
+
+ result = _compute_project_permissions(user, "view")
+
+ assert result.memberships == {"proj_a": "role_a", "proj_b": "role_c"}
+ assert result.permission == "view"
+ mock_membership.get_memberships_for_user.assert_called_once_with(user.id)
+
+
+@patch("testgen.mcp.permissions.ProjectMembership")
+def test_compute_project_permissions_no_memberships(mock_membership):
+ user = MagicMock()
+ user.id = uuid4()
+ mock_membership.get_memberships_for_user.return_value = []
+
+ result = _compute_project_permissions(user, "view")
+
+ assert result.memberships == {}
+ assert result.permission == "view"
+
+
+# --- ProjectPermissions.codes_allowed_to ---
+# These rely on the conftest's PluginHook mock (TEST_PERM_MATRIX).
+
+
+def test_codes_allowed_to_filters_by_role():
+ perms = ProjectPermissions(
+ memberships={"proj_a": "role_a", "proj_b": "role_c"},
+ permission="catalog",
+ )
+ # "view" includes role_a but not role_c
+ result = perms.codes_allowed_to("view")
+ assert result == ["proj_a"]
+
+
+def test_codes_allowed_to_all_matching():
+ perms = ProjectPermissions(
+ memberships={"proj_a": "role_a", "proj_b": "role_b"},
+ permission="catalog",
+ )
+ # "catalog" includes all roles
+ result = perms.codes_allowed_to("catalog")
+ assert sorted(result) == ["proj_a", "proj_b"]
+
+
+def test_codes_allowed_to_none_matching():
+ perms = ProjectPermissions(
+ memberships={"proj_a": "role_c"},
+ permission="catalog",
+ )
+ # "view" excludes role_c
+ result = perms.codes_allowed_to("view")
+ assert result == []
+
+
+# --- ProjectPermissions.allowed_codes ---
+
+
+def test_allowed_codes_uses_decorator_permission():
+ perms = ProjectPermissions(
+ memberships={"proj_a": "role_a", "proj_b": "role_c"},
+ permission="view",
+ )
+ # "view" includes role_a but not role_c
+ assert perms.allowed_codes == ["proj_a"]
+
+
+# --- ProjectPermissions.verify_access ---
+
+
+def test_verify_access_allowed_passes():
+ perms = ProjectPermissions(memberships={"proj_a": "role_a"}, permission="view")
+ perms.verify_access("proj_a", not_found="not found")
+
+
+def test_verify_access_membership_but_wrong_role_raises():
+ perms = ProjectPermissions(
+ memberships={"proj_a": "role_a", "proj_b": "role_c"},
+ permission="view",
+ )
+ with pytest.raises(MCPPermissionDenied, match="necessary permission"):
+ perms.verify_access("proj_b", not_found="not found")
+
+
+def test_verify_access_no_membership_raises_not_found():
+ perms = ProjectPermissions(
+ memberships={"proj_a": "role_a"},
+ permission="view",
+ )
+ with pytest.raises(MCPPermissionDenied, match="not found"):
+ perms.verify_access("secret", not_found="not found")
+
+
+# --- ProjectPermissions.has_access ---
+
+
+def test_has_access():
+ perms = ProjectPermissions(memberships={"proj_a": "role_a"}, permission="view")
+ assert perms.has_access("proj_a") is True
+ assert perms.has_access("proj_b") is False
+
+
+# --- get_project_permissions ---
+
+
+def test_get_project_permissions_raises_without_decorator():
+ with pytest.raises(RuntimeError, match="add the decorator"):
+ get_project_permissions()
+
+
+def test_get_project_permissions_returns_set_value():
+ perms = ProjectPermissions(memberships={}, permission="view")
+ token = _mcp_project_permissions.set(perms)
+ try:
+ assert get_project_permissions() is perms
+ finally:
+ _mcp_project_permissions.reset(token)
+
+
+# --- mcp_permission decorator ---
+# These rely on conftest's mocks (User, ProjectMembership, PluginHook).
+
+
+def test_mcp_permission_sets_contextvar():
+ set_mcp_username("test")
+
+ captured = {}
+
+ @mcp_permission("view")
+ def tool_fn():
+ perms = get_project_permissions()
+ captured["perms"] = perms
+ return "ok"
+
+ result = tool_fn()
+
+ assert result == "ok"
+ assert "demo" in captured["perms"].allowed_codes
+ assert captured["perms"].memberships == {"demo": "role_a"}
+
+
+@patch("testgen.mcp.permissions.ProjectMembership")
+def test_mcp_permission_raises_when_no_allowed_codes(mock_membership):
+ """Decorator raises MCPPermissionDenied if user has no projects with the required permission."""
+ set_mcp_username("test")
+
+ m1 = MagicMock()
+ m1.project_code = "proj_a"
+ m1.role = "role_c"
+ mock_membership.get_memberships_for_user.return_value = [m1]
+
+ @mcp_permission("view")
+ def tool_fn():
+ raise AssertionError("Should not be called")
+
+ with pytest.raises(MCPPermissionDenied, match="permission"):
+ tool_fn()
+
+
+def test_mcp_permission_propagates_mcp_permission_denied():
+ """Decorator lets MCPPermissionDenied propagate — safe_tool handles conversion."""
+ set_mcp_username("test")
+
+ @mcp_permission("view")
+ def tool_fn():
+ raise MCPPermissionDenied("Access denied for testing")
+
+ with pytest.raises(MCPPermissionDenied, match="Access denied for testing"):
+ tool_fn()
+
+
+def test_mcp_permission_resets_contextvar_after_call():
+ set_mcp_username("test")
+
+ @mcp_permission("view")
+ def tool_fn():
+ return "ok"
+
+ tool_fn()
+
+ assert _mcp_project_permissions.get() is _NOT_SET
+
+
+def test_mcp_permission_preserves_function_metadata():
+ @mcp_permission("view")
+ def my_tool(x: int, y: str = "default") -> str:
+ """Tool docstring."""
+ return f"{x}-{y}"
+
+ assert my_tool.__name__ == "my_tool"
+ assert my_tool.__doc__ == "Tool docstring."
diff --git a/tests/unit/mcp/test_tools_discovery.py b/tests/unit/mcp/test_tools_discovery.py
new file mode 100644
index 00000000..a04379d9
--- /dev/null
+++ b/tests/unit/mcp/test_tools_discovery.py
@@ -0,0 +1,213 @@
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+
+from testgen.mcp.exceptions import MCPPermissionDenied
+from testgen.mcp.permissions import ProjectPermissions
+
+
+@patch("testgen.mcp.services.inventory_service.get_inventory")
+def test_get_data_inventory_returns_markdown(mock_get_inventory, db_session_mock):
+ mock_get_inventory.return_value = "# Data Inventory\n\n## Project: Demo"
+
+ from testgen.mcp.tools.discovery import get_data_inventory
+
+ result = get_data_inventory()
+
+ assert "Data Inventory" in result
+ mock_get_inventory.assert_called_once()
+
+
+@patch("testgen.mcp.services.inventory_service.get_inventory")
+@patch("testgen.mcp.permissions._compute_project_permissions")
+def test_get_data_inventory_passes_project_codes_for_scoped_user(
+ mock_compute, mock_get_inventory, db_session_mock,
+):
+ mock_compute.return_value = ProjectPermissions(
+ memberships={"proj_a": "role_c"},
+ permission="catalog",
+ )
+ mock_get_inventory.return_value = "# Data Inventory"
+
+ from testgen.mcp.tools.discovery import get_data_inventory
+
+ get_data_inventory()
+
+ call_kwargs = mock_get_inventory.call_args.kwargs
+ assert call_kwargs["project_codes"] == ["proj_a"]
+
+
+@patch("testgen.mcp.services.inventory_service.get_inventory")
+@patch("testgen.mcp.permissions._compute_project_permissions")
+def test_get_data_inventory_view_codes_for_scoped_user(
+ mock_compute, mock_get_inventory, db_session_mock,
+):
+ mock_compute.return_value = ProjectPermissions(
+ memberships={"proj_a": "role_c", "proj_b": "role_a"},
+ permission="catalog",
+ )
+ mock_get_inventory.return_value = "# Data Inventory"
+
+ from testgen.mcp.tools.discovery import get_data_inventory
+
+ get_data_inventory()
+
+ call_kwargs = mock_get_inventory.call_args.kwargs
+ # "view" includes role_a but not role_c
+ assert call_kwargs["view_project_codes"] == ["proj_b"]
+
+
+@patch("testgen.mcp.tools.discovery.Project")
+def test_list_projects_returns_formatted(mock_project, db_session_mock):
+ proj1 = MagicMock()
+ proj1.project_name = "Demo Project"
+ proj1.project_code = "demo"
+ proj2 = MagicMock()
+ proj2.project_name = "Staging"
+ proj2.project_code = "staging"
+ mock_project.select_where.return_value = [proj1, proj2]
+
+ from testgen.mcp.tools.discovery import list_projects
+
+ result = list_projects()
+
+ assert "Demo Project" in result
+ assert "`demo`" in result
+ # "staging" is not in conftest's default memberships, so filtered out
+ assert "Staging" not in result
+
+
+@patch("testgen.mcp.tools.discovery.Project")
+def test_list_projects_empty(mock_project, db_session_mock):
+ mock_project.select_where.return_value = []
+
+ from testgen.mcp.tools.discovery import list_projects
+
+ result = list_projects()
+
+ assert "No projects found" in result
+
+
+@patch("testgen.mcp.tools.discovery.Project")
+@patch("testgen.mcp.permissions._compute_project_permissions")
+def test_list_projects_filters_for_scoped_user(mock_compute, mock_project, db_session_mock):
+ mock_compute.return_value = ProjectPermissions(
+ memberships={"demo": "role_a"},
+ permission="catalog",
+ )
+
+ proj1 = MagicMock()
+ proj1.project_name = "Demo Project"
+ proj1.project_code = "demo"
+ proj2 = MagicMock()
+ proj2.project_name = "Secret"
+ proj2.project_code = "secret"
+ mock_project.select_where.return_value = [proj1, proj2]
+
+ from testgen.mcp.tools.discovery import list_projects
+
+ result = list_projects()
+
+ assert "Demo Project" in result
+ assert "Secret" not in result
+
+
+@patch("testgen.mcp.tools.discovery.TestSuite")
+def test_list_test_suites_returns_stats(mock_suite, db_session_mock):
+ summary = MagicMock()
+ summary.id = uuid4()
+ summary.test_suite = "Quality Suite"
+ summary.connection_name = "main_conn"
+ summary.table_groups_name = "core_tables"
+ summary.test_suite_description = "Main quality checks"
+ summary.test_ct = 50
+ summary.latest_run_id = uuid4()
+ summary.latest_run_start = "2024-01-15T10:00:00"
+ summary.last_run_test_ct = 50
+ summary.last_run_passed_ct = 45
+ summary.last_run_failed_ct = 3
+ summary.last_run_warning_ct = 2
+ summary.last_run_error_ct = 0
+ summary.last_run_dismissed_ct = 0
+ mock_suite.select_summary.return_value = [summary]
+
+ from testgen.mcp.tools.discovery import list_test_suites
+
+ result = list_test_suites("demo")
+
+ assert "Quality Suite" in result
+ assert "45 passed" in result
+ assert "3 failed" in result
+
+
+@patch("testgen.mcp.tools.discovery.TestSuite")
+def test_list_test_suites_empty(mock_suite, db_session_mock):
+ mock_suite.select_summary.return_value = []
+
+ from testgen.mcp.tools.discovery import list_test_suites
+
+ result = list_test_suites("demo")
+
+ assert "No test suites found" in result
+
+
+def test_list_test_suites_empty_project_code(db_session_mock):
+ from testgen.mcp.tools.discovery import list_test_suites
+
+ result = list_test_suites("")
+
+ assert "Missing required parameter" in result
+ assert "project_code" in result
+
+
+@patch("testgen.mcp.permissions._compute_project_permissions")
+def test_list_test_suites_raises_not_found_for_inaccessible_project(
+ mock_compute, db_session_mock,
+):
+ mock_compute.return_value = ProjectPermissions(
+ memberships={"other_project": "role_a"},
+ permission="view",
+ )
+
+ from testgen.mcp.tools.discovery import list_test_suites
+
+ with pytest.raises(MCPPermissionDenied, match="No test suites found for project `secret_project`"):
+ list_test_suites("secret_project")
+
+
+@patch("testgen.mcp.permissions._compute_project_permissions")
+def test_list_test_suites_raises_denial_for_insufficient_permission(
+ mock_compute, db_session_mock,
+):
+ mock_compute.return_value = ProjectPermissions(
+ memberships={"other_project": "role_a", "secret_project": "role_c"},
+ permission="view",
+ )
+
+ from testgen.mcp.tools.discovery import list_test_suites
+
+ with pytest.raises(MCPPermissionDenied, match="necessary permission"):
+ list_test_suites("secret_project")
+
+
+@patch("testgen.mcp.tools.discovery.DataTable")
+@patch("testgen.mcp.permissions._compute_project_permissions")
+def test_list_tables_returns_not_found_for_inaccessible_group(
+ mock_compute, mock_dt, db_session_mock,
+):
+ mock_compute.return_value = ProjectPermissions(
+ memberships={"proj_a": "role_a"},
+ permission="catalog",
+ )
+ mock_dt.select_table_names.return_value = []
+ mock_dt.count_tables.return_value = 0
+
+ from testgen.mcp.tools.discovery import list_tables
+
+ result = list_tables(str(uuid4()))
+
+ assert "No tables found" in result
+ mock_dt.select_table_names.assert_called_once()
+ call_kwargs = mock_dt.select_table_names.call_args
+ assert call_kwargs.kwargs["project_codes"] == ["proj_a"]
diff --git a/tests/unit/mcp/test_tools_reference.py b/tests/unit/mcp/test_tools_reference.py
new file mode 100644
index 00000000..bbfcdead
--- /dev/null
+++ b/tests/unit/mcp/test_tools_reference.py
@@ -0,0 +1,91 @@
+from unittest.mock import MagicMock, patch
+
+
+@patch("testgen.mcp.tools.reference.TestType")
+def test_get_test_type_found(mock_tt_cls, db_session_mock):
+ tt = MagicMock()
+ tt.test_type = "Alpha_Trunc"
+ tt.test_name_short = "Alpha Truncation"
+ tt.test_name_long = "Alphabetic Truncation Test"
+ tt.test_description = "Checks for truncated alphabetic values"
+ tt.measure_uom = "Pct"
+ tt.measure_uom_description = "Percentage of truncated values"
+ tt.threshold_description = "Maximum allowed truncation rate"
+ tt.dq_dimension = "Accuracy"
+ tt.test_scope = "column"
+ tt.except_message = "Alpha truncation detected"
+ tt.usage_notes = "Best for VARCHAR columns"
+ mock_tt_cls.select_where.return_value = [tt]
+
+ from testgen.mcp.tools.reference import get_test_type
+
+ result = get_test_type("Alpha Truncation")
+
+ assert "Alpha Truncation" in result
+ assert "Alpha_Trunc" not in result
+ assert "Accuracy" in result
+ assert "column" in result
+ assert "truncated" in result.lower()
+
+
+@patch("testgen.mcp.tools.reference.TestType")
+def test_get_test_type_not_found(mock_tt_cls, db_session_mock):
+ mock_tt_cls.select_where.return_value = []
+
+ from testgen.mcp.tools.reference import get_test_type
+
+ result = get_test_type("Nonexistent Type")
+
+ assert "not found" in result
+
+
+@patch("testgen.mcp.tools.reference.TestType")
+def test_test_types_resource(mock_tt_cls, db_session_mock):
+ tt1 = MagicMock()
+ tt1.test_type = "Alpha_Trunc"
+ tt1.test_name_short = "Alpha Truncation"
+ tt1.dq_dimension = "Accuracy"
+ tt1.test_scope = "column"
+ tt1.test_description = "Checks truncation"
+ tt2 = MagicMock()
+ tt2.test_type = "Unique_Pct"
+ tt2.test_name_short = "Unique Percent"
+ tt2.dq_dimension = "Uniqueness"
+ tt2.test_scope = "column"
+ tt2.test_description = "Checks unique percentage"
+ mock_tt_cls.select_where.return_value = [tt1, tt2]
+
+ from testgen.mcp.tools.reference import test_types_resource
+
+ result = test_types_resource()
+
+ assert "Alpha Truncation" in result
+ assert "Unique Percent" in result
+ assert "Alpha_Trunc" not in result
+ assert "Unique_Pct" not in result
+ assert "Accuracy" in result
+ assert "Uniqueness" in result
+
+
+@patch("testgen.mcp.tools.reference.TestType")
+def test_test_types_resource_empty(mock_tt_cls, db_session_mock):
+ mock_tt_cls.select_where.return_value = []
+
+ from testgen.mcp.tools.reference import test_types_resource
+
+ result = test_types_resource()
+
+ assert "No test types found" in result
+
+
+def test_glossary_resource():
+ from testgen.mcp.tools.reference import glossary_resource
+
+ result = glossary_resource()
+
+ assert "Entity Hierarchy" in result
+ assert "Result Statuses" in result
+ assert "Data Quality Dimensions" in result
+ assert "Test Scopes" in result
+ assert "Disposition" in result
+ assert "Monitor Types" not in result
diff --git a/tests/unit/mcp/test_tools_test_results.py b/tests/unit/mcp/test_tools_test_results.py
new file mode 100644
index 00000000..1fd2812e
--- /dev/null
+++ b/tests/unit/mcp/test_tools_test_results.py
@@ -0,0 +1,297 @@
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+
+from testgen.common.models.test_result import TestResultStatus
+from testgen.mcp.exceptions import MCPUserError
+from testgen.mcp.permissions import ProjectPermissions
+
+
+@patch("testgen.mcp.tools.test_results.TestType")
+@patch("testgen.mcp.tools.test_results.TestResult")
+def test_get_test_results_basic(mock_result, mock_tt_cls, db_session_mock):
+ run_id = str(uuid4())
+ r1 = MagicMock()
+ r1.status = TestResultStatus.Failed
+ r1.test_type = "Alpha_Trunc"
+ r1.test_definition_id = uuid4()
+ r1.table_name = "orders"
+ r1.column_names = "customer_name"
+ r1.result_measure = "15.3"
+ r1.threshold_value = "10.0"
+ r1.message = "Truncation detected"
+ mock_result.select_results.return_value = [r1]
+
+ tt = MagicMock()
+ tt.test_type = "Alpha_Trunc"
+ tt.test_name_short = "Alpha Truncation"
+ mock_tt_cls.select_where.return_value = [tt]
+
+ from testgen.mcp.tools.test_results import get_test_results
+
+ result = get_test_results(run_id)
+
+ assert "Alpha Truncation" in result
+ assert "Alpha_Trunc" not in result
+ assert "on `customer_name` in `orders`" in result
+ assert "15.3" in result
+ assert "Truncation detected" in result
+
+
+@patch("testgen.mcp.tools.test_results.TestType")
+@patch("testgen.mcp.tools.test_results.TestResult")
+def test_get_test_results_table_level_title(mock_result, mock_tt_cls, db_session_mock):
+ run_id = str(uuid4())
+ r1 = MagicMock()
+ r1.status = TestResultStatus.Passed
+ r1.test_type = "Row_Ct"
+ r1.test_definition_id = uuid4()
+ r1.table_name = "orders"
+ r1.column_names = None
+ r1.result_measure = "1000"
+ r1.threshold_value = "500"
+ r1.message = None
+ mock_result.select_results.return_value = [r1]
+
+ tt = MagicMock()
+ tt.test_type = "Row_Ct"
+ tt.test_name_short = "Row Count"
+ mock_tt_cls.select_where.return_value = [tt]
+
+ from testgen.mcp.tools.test_results import get_test_results
+
+ result = get_test_results(run_id)
+
+ assert "Row Count on `orders`" in result
+ assert "` in `" not in result
+
+
+@patch("testgen.mcp.tools.test_results.TestResult")
+def test_get_test_results_empty(mock_result, db_session_mock):
+ mock_result.select_results.return_value = []
+
+ from testgen.mcp.tools.test_results import get_test_results
+
+ result = get_test_results(str(uuid4()))
+
+ assert "No test results found" in result
+
+
+@patch("testgen.mcp.tools.test_results.TestType")
+@patch("testgen.mcp.tools.test_results.TestResult")
+def test_get_test_results_with_filters(mock_result, mock_tt_cls, db_session_mock):
+ tt = MagicMock()
+ tt.test_type = "Alpha_Trunc"
+ tt.test_name_short = "Alpha Truncation"
+ mock_tt_cls.select_where.return_value = [tt]
+ mock_result.select_results.return_value = []
+
+ from testgen.mcp.tools.test_results import get_test_results
+
+ result = get_test_results(str(uuid4()), status="Failed", table_name="orders", test_type="Alpha Truncation")
+
+ assert "status=Failed" in result
+ assert "table=orders" in result
+ assert "type=Alpha Truncation" in result
+
+
+def test_get_test_results_invalid_uuid(db_session_mock):
+ from testgen.mcp.tools.test_results import get_test_results
+
+ with pytest.raises(MCPUserError, match="not a valid UUID"):
+ get_test_results("not-a-uuid")
+
+
+def test_get_test_results_invalid_status(db_session_mock):
+ from testgen.mcp.tools.test_results import get_test_results
+
+ with pytest.raises(MCPUserError, match="Invalid status"):
+ get_test_results(str(uuid4()), status="BadStatus")
+
+
+@patch("testgen.mcp.tools.test_results.TestResult")
+@patch("testgen.mcp.permissions._compute_project_permissions")
+def test_get_test_results_passes_project_codes(mock_compute, mock_result, db_session_mock):
+ mock_compute.return_value = ProjectPermissions(
+ memberships={"proj_a": "role_a"},
+ permission="view",
+ )
+ mock_result.select_results.return_value = []
+
+ from testgen.mcp.tools.test_results import get_test_results
+
+ get_test_results(str(uuid4()))
+
+ call_kwargs = mock_result.select_results.call_args.kwargs
+ assert call_kwargs["project_codes"] == ["proj_a"]
+
+
+@patch("testgen.mcp.tools.test_results.TestType")
+@patch("testgen.mcp.tools.test_results.TestResult")
+def test_get_failure_summary_by_test_type(mock_result, mock_tt_cls, db_session_mock):
+ mock_result.select_failures.return_value = [
+ ("Alpha_Trunc", TestResultStatus.Failed, 5),
+ ("Unique_Pct", TestResultStatus.Warning, 3),
+ ]
+ tt1 = MagicMock()
+ tt1.test_type = "Alpha_Trunc"
+ tt1.test_name_short = "Alpha Truncation"
+ tt2 = MagicMock()
+ tt2.test_type = "Unique_Pct"
+ tt2.test_name_short = "Unique Percent"
+ mock_tt_cls.select_where.return_value = [tt1, tt2]
+
+ from testgen.mcp.tools.test_results import get_failure_summary
+
+ result = get_failure_summary(str(uuid4()))
+
+ assert "Failed + Warning" in result
+ assert "8" in result
+ assert "Alpha Truncation" in result
+ assert "Alpha_Trunc" not in result
+ assert "Severity" in result
+ assert "Failed" in result
+ assert "Warning" in result
+ assert "get_test_type" in result
+
+
+@patch("testgen.mcp.tools.test_results.TestResult")
+def test_get_failure_summary_empty(mock_result, db_session_mock):
+ mock_result.select_failures.return_value = []
+
+ from testgen.mcp.tools.test_results import get_failure_summary
+
+ result = get_failure_summary(str(uuid4()))
+
+ assert "No confirmed failures" in result
+
+
+@patch("testgen.mcp.tools.test_results.TestResult")
+def test_get_failure_summary_by_table(mock_result, db_session_mock):
+ mock_result.select_failures.return_value = [("orders", 10)]
+
+ from testgen.mcp.tools.test_results import get_failure_summary
+
+ result = get_failure_summary(str(uuid4()), group_by="table")
+
+ assert "Table Name" in result
+ assert "orders" in result
+ assert "get_test_type" not in result
+
+
+@patch("testgen.mcp.tools.test_results.TestResult")
+def test_get_failure_summary_by_column(mock_result, db_session_mock):
+ mock_result.select_failures.return_value = [("orders", "total_value", 34), ("orders", None, 2)]
+
+ from testgen.mcp.tools.test_results import get_failure_summary
+
+ result = get_failure_summary(str(uuid4()), group_by="column")
+
+ assert "Column" in result
+ assert "`total_value` in `orders`" in result
+ assert "`orders` (table-level)" in result
+ assert "get_test_type" not in result
+
+
+def test_get_failure_summary_invalid_uuid(db_session_mock):
+ from testgen.mcp.tools.test_results import get_failure_summary
+
+ with pytest.raises(MCPUserError, match="not a valid UUID"):
+ get_failure_summary("bad-uuid")
+
+
+@patch("testgen.mcp.tools.test_results.TestResult")
+@patch("testgen.mcp.permissions._compute_project_permissions")
+def test_get_failure_summary_passes_project_codes(
+ mock_compute, mock_result, db_session_mock,
+):
+ mock_compute.return_value = ProjectPermissions(
+ memberships={"proj_a": "role_a"},
+ permission="view",
+ )
+ mock_result.select_failures.return_value = []
+
+ from testgen.mcp.tools.test_results import get_failure_summary
+
+ get_failure_summary(str(uuid4()))
+
+ call_kwargs = mock_result.select_failures.call_args.kwargs
+ assert call_kwargs["project_codes"] == ["proj_a"]
+
+
+@patch("testgen.mcp.tools.test_results.TestType")
+@patch("testgen.mcp.tools.test_results.TestResult")
+def test_get_test_result_history_basic(mock_result, mock_tt_cls, db_session_mock):
+ def_id = str(uuid4())
+ r1 = MagicMock()
+ r1.test_type = "Unique_Pct"
+ r1.table_name = "orders"
+ r1.column_names = "order_id"
+ r1.test_time = "2024-01-15T10:00:00"
+ r1.result_measure = "99.5"
+ r1.threshold_value = "95.0"
+ r1.status = TestResultStatus.Passed
+ r2 = MagicMock()
+ r2.test_type = "Unique_Pct"
+ r2.table_name = "orders"
+ r2.column_names = "order_id"
+ r2.test_time = "2024-01-10T10:00:00"
+ r2.result_measure = "88.0"
+ r2.threshold_value = "95.0"
+ r2.status = TestResultStatus.Failed
+ mock_result.select_history.return_value = [r1, r2]
+
+ tt = MagicMock()
+ tt.test_type = "Unique_Pct"
+ tt.test_name_short = "Unique Percent"
+ mock_tt_cls.select_where.return_value = [tt]
+
+ from testgen.mcp.tools.test_results import get_test_result_history
+
+ result = get_test_result_history(def_id)
+
+ assert "Unique Percent" in result
+ assert "Unique_Pct" not in result
+ assert "orders" in result
+ assert "99.5" in result
+ assert "88.0" in result
+ assert "Passed" in result
+ assert "Failed" in result
+
+
+@patch("testgen.mcp.tools.test_results.TestResult")
+def test_get_test_result_history_empty(mock_result, db_session_mock):
+ mock_result.select_history.return_value = []
+
+ from testgen.mcp.tools.test_results import get_test_result_history
+
+ result = get_test_result_history(str(uuid4()))
+
+ assert "No historical results" in result
+
+
+def test_get_test_result_history_invalid_uuid(db_session_mock):
+ from testgen.mcp.tools.test_results import get_test_result_history
+
+ with pytest.raises(MCPUserError, match="not a valid UUID"):
+ get_test_result_history("bad-uuid")
+
+
+@patch("testgen.mcp.tools.test_results.TestResult")
+@patch("testgen.mcp.permissions._compute_project_permissions")
+def test_get_test_result_history_passes_project_codes(
+ mock_compute, mock_result, db_session_mock,
+):
+ mock_compute.return_value = ProjectPermissions(
+ memberships={"proj_a": "role_a"},
+ permission="view",
+ )
+ mock_result.select_history.return_value = []
+
+ from testgen.mcp.tools.test_results import get_test_result_history
+
+ get_test_result_history(str(uuid4()))
+
+ call_kwargs = mock_result.select_history.call_args.kwargs
+ assert call_kwargs["project_codes"] == ["proj_a"]
diff --git a/tests/unit/mcp/test_tools_test_runs.py b/tests/unit/mcp/test_tools_test_runs.py
new file mode 100644
index 00000000..5b9a7d28
--- /dev/null
+++ b/tests/unit/mcp/test_tools_test_runs.py
@@ -0,0 +1,167 @@
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+
+from testgen.mcp.exceptions import MCPPermissionDenied
+from testgen.mcp.permissions import ProjectPermissions
+
+
+def _make_run_summary(**overrides):
+ defaults = {
+ "test_run_id": uuid4(), "test_suite": "Quality Suite", "project_name": "Demo",
+ "table_groups_name": "core_tables", "status": "Complete",
+ "test_starttime": "2024-01-15T10:00:00", "test_endtime": "2024-01-15T10:05:00",
+ "test_ct": 50, "passed_ct": 45, "failed_ct": 3, "warning_ct": 2, "error_ct": 0,
+ "log_ct": 0, "dismissed_ct": 0, "dq_score_testing": 92.5,
+ }
+ defaults.update(overrides)
+ return MagicMock(**defaults)
+
+
+@patch("testgen.mcp.tools.test_runs.TestRun")
+@patch("testgen.mcp.tools.test_runs.TestSuite")
+def test_get_recent_test_runs_default_limit(mock_suite, mock_run, db_session_mock):
+ """Default limit=1 returns one run per suite."""
+ runs = [_make_run_summary(test_run_id=uuid4()) for _ in range(7)]
+ mock_run.select_summary.return_value = runs
+
+ from testgen.mcp.tools.test_runs import get_recent_test_runs
+
+ result = get_recent_test_runs("demo")
+
+ # All 7 runs have test_suite="Quality Suite", so only 1 should appear
+ assert "1 run(s)" in result
+ assert "Quality Suite" in result
+ assert "92.5" in result
+ mock_run.select_summary.assert_called_once_with(project_code="demo", test_suite_id=None)
+
+
+@patch("testgen.mcp.tools.test_runs.TestRun")
+@patch("testgen.mcp.tools.test_runs.TestSuite")
+def test_get_recent_test_runs_custom_limit(mock_suite, mock_run, db_session_mock):
+ """Custom limit returns up to N runs per suite."""
+ runs = [_make_run_summary() for _ in range(3)]
+ mock_run.select_summary.return_value = runs
+
+ from testgen.mcp.tools.test_runs import get_recent_test_runs
+
+ result = get_recent_test_runs("demo", limit=10)
+
+ assert "3 run(s)" in result
+
+
+@patch("testgen.mcp.tools.test_runs.TestRun")
+@patch("testgen.mcp.tools.test_runs.TestSuite")
+def test_get_recent_test_runs_per_suite_grouping(mock_suite, mock_run, db_session_mock):
+ """With multiple suites, returns limit runs per suite."""
+ runs = [
+ _make_run_summary(test_suite="Suite A", test_run_id=uuid4()),
+ _make_run_summary(test_suite="Suite A", test_run_id=uuid4()),
+ _make_run_summary(test_suite="Suite B", test_run_id=uuid4()),
+ _make_run_summary(test_suite="Suite B", test_run_id=uuid4()),
+ ]
+ mock_run.select_summary.return_value = runs
+
+ from testgen.mcp.tools.test_runs import get_recent_test_runs
+
+ result = get_recent_test_runs("demo")
+
+ # limit=1 (default), so 1 per suite = 2 total
+ assert "2 run(s)" in result
+ assert "Suite A" in result
+ assert "Suite B" in result
+
+
+@patch("testgen.mcp.tools.test_runs.TestRun")
+@patch("testgen.mcp.tools.test_runs.TestSuite")
+def test_get_recent_test_runs_with_suite_name(mock_suite, mock_run, db_session_mock):
+ suite_id = uuid4()
+ suite_minimal = MagicMock()
+ suite_minimal.id = suite_id
+ mock_suite.select_minimal_where.return_value = [suite_minimal]
+ mock_run.select_summary.return_value = [_make_run_summary(test_suite="My Suite")]
+
+ from testgen.mcp.tools.test_runs import get_recent_test_runs
+
+ result = get_recent_test_runs("demo", test_suite="My Suite")
+
+ mock_run.select_summary.assert_called_once_with(project_code="demo", test_suite_id=str(suite_id))
+ assert "My Suite" in result
+
+
+@patch("testgen.mcp.tools.test_runs.TestRun")
+@patch("testgen.mcp.tools.test_runs.TestSuite")
+def test_get_recent_test_runs_suite_not_found(mock_suite, mock_run, db_session_mock):
+ mock_suite.select_minimal_where.return_value = []
+
+ from testgen.mcp.tools.test_runs import get_recent_test_runs
+
+ result = get_recent_test_runs("demo", test_suite="Nonexistent")
+
+ assert "not found" in result
+ mock_run.select_summary.assert_not_called()
+
+
+@patch("testgen.mcp.tools.test_runs.TestRun")
+@patch("testgen.mcp.tools.test_runs.TestSuite")
+def test_get_recent_test_runs_no_runs(mock_suite, mock_run, db_session_mock):
+ mock_run.select_summary.return_value = []
+
+ from testgen.mcp.tools.test_runs import get_recent_test_runs
+
+ result = get_recent_test_runs("demo")
+
+ assert "No completed test runs" in result
+
+
+@patch("testgen.mcp.tools.test_runs.TestRun")
+@patch("testgen.mcp.tools.test_runs.TestSuite")
+def test_get_recent_test_runs_shows_failure_counts(mock_suite, mock_run, db_session_mock):
+ mock_run.select_summary.return_value = [_make_run_summary(failed_ct=5, warning_ct=2)]
+
+ from testgen.mcp.tools.test_runs import get_recent_test_runs
+
+ result = get_recent_test_runs("demo")
+
+ assert "5 failed" in result
+ assert "2 warnings" in result
+
+
+def test_get_recent_test_runs_empty_project_code(db_session_mock):
+ from testgen.mcp.tools.test_runs import get_recent_test_runs
+
+ result = get_recent_test_runs("")
+
+ assert "Missing required parameter" in result
+ assert "project_code" in result
+
+
+@patch("testgen.mcp.permissions._compute_project_permissions")
+def test_get_recent_test_runs_raises_not_found_for_inaccessible_project(
+ mock_compute, db_session_mock,
+):
+ mock_compute.return_value = ProjectPermissions(
+ memberships={"other_project": "role_a"},
+ permission="view",
+ )
+
+ from testgen.mcp.tools.test_runs import get_recent_test_runs
+
+ with pytest.raises(MCPPermissionDenied, match="No completed test runs found in project `secret_project`"):
+ get_recent_test_runs("secret_project")
+
+
+@patch("testgen.mcp.permissions._compute_project_permissions")
+def test_get_recent_test_runs_raises_denial_for_insufficient_permission(
+ mock_compute, db_session_mock,
+):
+ mock_compute.return_value = ProjectPermissions(
+ memberships={"other_project": "role_a", "secret_project": "role_c"},
+ permission="view",
+ )
+
+ from testgen.mcp.tools.test_runs import get_recent_test_runs
+
+ with pytest.raises(MCPPermissionDenied, match="necessary permission"):
+ get_recent_test_runs("secret_project")
diff --git a/tests/unit/scheduler/test_scheduler_base.py b/tests/unit/scheduler/test_scheduler_base.py
index ccac8374..ab0445e2 100644
--- a/tests/unit/scheduler/test_scheduler_base.py
+++ b/tests/unit/scheduler/test_scheduler_base.py
@@ -138,7 +138,7 @@ def wait_for_call_count(mock, expected_count, timeout=0.5):
@pytest.mark.parametrize("with_job", (True, False))
def test_reloads_and_shutdowns_immediately(with_job, scheduler_instance, base_time):
- jobs = [Job(cron_expr="0 0 * * *", cron_tz="UTC", delayed_policy=DelayedPolicy.ALL)] if with_job else []
+ jobs = [Job(cron_expr="0 0 * * *", cron_tz="UTC", delayed_policy=DelayedPolicy.SKIP)] if with_job else []
scheduler_instance.get_jobs.return_value = jobs
scheduler_instance.start(base_time)
@@ -169,8 +169,8 @@ def test_job_start_is_called(start_side_effect, scheduler_instance, base_time, n
scheduler_instance.start(base_time)
for multiplier in (1, 2):
- while scheduler_instance.start_job.call_count != 6 * multiplier:
- time.sleep(0.01)
+ assert wait_for_call_count(scheduler_instance.start_job, 6 * multiplier, timeout=5.0), \
+ f"start_job call_count={scheduler_instance.start_job.call_count}, expected {6 * multiplier}"
assert scheduler_instance.get_jobs.call_count == multiplier
assert get_next_mock.call_count == multiplier
diff --git a/tests/unit/ui/__init__.py b/tests/unit/ui/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unit/ui/conftest.py b/tests/unit/ui/conftest.py
new file mode 100644
index 00000000..1aa05727
--- /dev/null
+++ b/tests/unit/ui/conftest.py
@@ -0,0 +1,7 @@
+import sys
+from unittest.mock import MagicMock
+
+# Mock the Streamlit component registration that fails outside a running Streamlit app.
+# The testgen_component module triggers component registration at import time, which
+# requires a Streamlit runtime. We mock it so pure-logic tests can import freely.
+sys.modules.setdefault("testgen.ui.components.widgets.testgen_component", MagicMock())
diff --git a/tests/unit/ui/test_import_metadata.py b/tests/unit/ui/test_import_metadata.py
new file mode 100644
index 00000000..037f9278
--- /dev/null
+++ b/tests/unit/ui/test_import_metadata.py
@@ -0,0 +1,328 @@
+import base64
+
+import pandas as pd
+import pytest
+
+from testgen.ui.views.dialogs.import_metadata_dialog import (
+ DESCRIPTION_MAX_LENGTH,
+ TAG_MAX_LENGTH,
+ _build_preview_props,
+ _extract_metadata_fields,
+ _parse_csv,
+ _set_row_status,
+ _truncate_fields,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def _make_base64_csv(csv_text: str) -> str:
+ encoded = base64.b64encode(csv_text.encode()).decode()
+ return f"data:text/csv;base64,{encoded}"
+
+
+def _make_series(data: dict) -> pd.Series:
+ return pd.Series(data)
+
+
+# --- _parse_csv ---
+
+
+def test_parse_csv_basic_table_and_column():
+ content = _make_base64_csv("Table,Column,Description\nmy_table,,Table desc\nmy_table,col1,Col desc\n")
+ result = _parse_csv(content)
+ assert "error" not in result
+ df = result["df"]
+ assert len(df) == 2
+ assert list(df["table_name"]) == ["my_table", "my_table"]
+ assert list(df["column_name"]) == ["", "col1"]
+
+
+def test_parse_csv_missing_table_column():
+ content = _make_base64_csv("Column,Description\ncol1,desc\n")
+ result = _parse_csv(content)
+ assert result["error"] == "CSV must contain a 'Table' column."
+
+
+def test_parse_csv_empty():
+ content = _make_base64_csv("Table,Column\n")
+ result = _parse_csv(content)
+ assert result["error"] == "CSV file is empty."
+
+
+def test_parse_csv_invalid_base64():
+ result = _parse_csv("data:text/csv;base64,!!!invalid!!!")
+ assert "error" in result
+ assert "Could not parse CSV file" in result["error"]
+
+
+def test_parse_csv_header_normalization_underscores():
+ content = _make_base64_csv("Table,Critical_Data_Element\nmy_table,Yes\n")
+ result = _parse_csv(content)
+ assert "error" not in result
+ assert "critical_data_element" in result["df"].columns
+
+
+def test_parse_csv_header_normalization_spaces():
+ content = _make_base64_csv("Table,Critical Data Element\nmy_table,Yes\n")
+ result = _parse_csv(content)
+ assert "error" not in result
+ assert "critical_data_element" in result["df"].columns
+
+
+def test_parse_csv_header_cde_alias():
+ content = _make_base64_csv("Table,CDE\nmy_table,Yes\n")
+ result = _parse_csv(content)
+ assert "error" not in result
+ assert "critical_data_element" in result["df"].columns
+
+
+def test_parse_csv_header_case_insensitive():
+ content = _make_base64_csv("TABLE,DESCRIPTION\nmy_table,desc\n")
+ result = _parse_csv(content)
+ assert "error" not in result
+ assert "description" in result["df"].columns
+
+
+def test_parse_csv_extra_columns_ignored():
+ content = _make_base64_csv("Table,Description,UnknownCol\nmy_table,desc,ignored\n")
+ result = _parse_csv(content)
+ assert "error" not in result
+ assert "UnknownCol" not in result["df"].columns
+
+
+def test_parse_csv_whitespace_stripped():
+ content = _make_base64_csv("Table,Description\n my_table , desc \n")
+ result = _parse_csv(content)
+ df = result["df"]
+ assert df.iloc[0]["table_name"] == "my_table"
+ assert df.iloc[0]["description"] == "desc"
+
+
+def test_parse_csv_duplicate_table_rows():
+ content = _make_base64_csv("Table,Description\nmy_table,first\nmy_table,second\n")
+ result = _parse_csv(content)
+ assert len(result["duplicate_rows"]) == 1
+ assert len(result["df"]) == 1
+ assert result["df"].iloc[0]["description"] == "second"
+
+
+def test_parse_csv_duplicate_column_rows():
+ content = _make_base64_csv("Table,Column,Description\nt,c,first\nt,c,second\n")
+ result = _parse_csv(content)
+ assert len(result["duplicate_rows"]) == 1
+ assert result["df"].iloc[0]["description"] == "second"
+
+
+def test_parse_csv_no_column_header_adds_empty():
+ content = _make_base64_csv("Table,Description\nmy_table,desc\n")
+ result = _parse_csv(content)
+ assert "column_name" in result["df"].columns
+ assert result["df"].iloc[0]["column_name"] == ""
+
+
+# --- _extract_metadata_fields ---
+
+
+@pytest.mark.parametrize("val", ["Yes", "yes", "Y", "y", "True", "true", "1"])
+def test_extract_cde_true_values(val):
+ fields, bad_cde, *_ = _extract_metadata_fields(_make_series({"critical_data_element": val}), "keep")
+ assert fields["critical_data_element"] is True
+ assert bad_cde == 0
+
+
+@pytest.mark.parametrize("val", ["No", "no", "N", "n", "False", "false", "0"])
+def test_extract_cde_false_values(val):
+ fields, bad_cde, *_ = _extract_metadata_fields(_make_series({"critical_data_element": val}), "keep")
+ assert fields["critical_data_element"] is False
+ assert bad_cde == 0
+
+
+def test_extract_cde_blank_keep():
+ fields, bad_cde, *_ = _extract_metadata_fields(_make_series({"critical_data_element": ""}), "keep")
+ assert "critical_data_element" not in fields
+ assert bad_cde == 0
+
+
+def test_extract_cde_blank_clear():
+ fields, bad_cde, *_ = _extract_metadata_fields(_make_series({"critical_data_element": ""}), "clear")
+ assert fields["critical_data_element"] is None
+ assert bad_cde == 0
+
+
+def test_extract_cde_unrecognized():
+ fields, bad_cde, *_ = _extract_metadata_fields(_make_series({"critical_data_element": "Maybe"}), "keep")
+ assert "critical_data_element" not in fields
+ assert bad_cde == 1
+
+
+def test_extract_text_field_with_value():
+ fields, *_ = _extract_metadata_fields(_make_series({"description": "test desc"}), "keep")
+ assert fields["description"] == "test desc"
+
+
+def test_extract_text_field_blank_keep():
+ fields, *_ = _extract_metadata_fields(_make_series({"description": ""}), "keep")
+ assert "description" not in fields
+
+
+def test_extract_text_field_blank_clear():
+ fields, *_ = _extract_metadata_fields(_make_series({"description": ""}), "clear")
+ assert fields["description"] == ""
+
+
+def test_extract_missing_column_skipped():
+ fields, *_ = _extract_metadata_fields(_make_series({"description": "test"}), "keep")
+ assert "data_source" not in fields
+
+
+def test_extract_tag_field_with_value():
+ fields, *_ = _extract_metadata_fields(_make_series({"data_source": "ERP"}), "keep")
+ assert fields["data_source"] == "ERP"
+
+
+# --- _truncate_fields ---
+
+
+def test_truncate_no_truncation_needed():
+ fields = {"description": "short", "data_source": "ERP"}
+ result, truncated = _truncate_fields(fields)
+ assert truncated == []
+ assert result["description"] == "short"
+
+
+def test_truncate_tag_at_max():
+ fields = {"data_source": "x" * (TAG_MAX_LENGTH + 10)}
+ result, truncated = _truncate_fields(fields)
+ assert truncated == ["data_source"]
+ assert len(result["data_source"]) == TAG_MAX_LENGTH
+
+
+def test_truncate_description_at_max():
+ fields = {"description": "x" * (DESCRIPTION_MAX_LENGTH + 10)}
+ result, truncated = _truncate_fields(fields)
+ assert truncated == ["description"]
+ assert len(result["description"]) == DESCRIPTION_MAX_LENGTH
+
+
+def test_truncate_boolean_fields_skipped():
+ fields = {"critical_data_element": True}
+ result, truncated = _truncate_fields(fields)
+ assert truncated == []
+ assert result["critical_data_element"] is True
+
+
+def test_truncate_multiple_fields():
+ fields = {"data_source": "x" * 50, "source_system": "y" * 50}
+ _, truncated = _truncate_fields(fields)
+ assert "data_source" in truncated
+ assert "source_system" in truncated
+
+
+# --- _set_row_status ---
+
+
+def test_set_row_status_ok():
+ row = {}
+ _set_row_status(row, bad_cde=False, bad_xde=False, bad_pii=False, truncated=[])
+ assert row["_status"] == "ok"
+ assert row["_status_detail"] == ""
+ assert row["_truncated_fields"] == []
+
+
+def test_set_row_status_error_bad_cde():
+ row = {}
+ _set_row_status(row, bad_cde=True, bad_xde=False, bad_pii=False, truncated=[])
+ assert row["_status"] == "error"
+ assert "Unrecognized CDE" in row["_status_detail"]
+
+
+def test_set_row_status_warning_truncated():
+ row = {}
+ _set_row_status(row, bad_cde=False, bad_xde=False, bad_pii=False, truncated=["data_source"])
+ assert row["_status"] == "warning"
+ assert "truncated" in row["_status_detail"]
+ assert "data_source" in row["_status_detail"]
+
+
+def test_set_row_status_error_precedence():
+ row = {}
+ _set_row_status(row, bad_cde=True, bad_xde=False, bad_pii=False, truncated=["data_source"])
+ assert row["_status"] == "error"
+ assert "CDE" in row["_status_detail"]
+ assert "truncated" in row["_status_detail"]
+
+
+# --- _build_preview_props ---
+
+
+def test_preview_props_basic():
+ preview = {
+ "table_rows": [{"table_id": "1", "table_name": "t1", "description": "desc"}],
+ "column_rows": [],
+ "preview_rows": [
+ {"table_name": "t1", "column_name": "", "description": "desc", "_status": "ok", "_status_detail": "", "_truncated_fields": []},
+ ],
+ "metadata_columns": ["description"],
+ "matched_tables": 1,
+ "matched_columns": 0,
+ "skipped_count": 0,
+ }
+ result = _build_preview_props(preview)
+ assert result["table_count"] == 1
+ assert result["column_count"] == 0
+ assert result["skipped_count"] == 0
+ assert len(result["preview_rows"]) == 1
+ assert result["preview_rows"][0]["description"] == "desc"
+
+
+def test_preview_props_cde_true():
+ preview = {
+ "table_rows": [{"table_id": "1", "table_name": "t", "critical_data_element": True}],
+ "column_rows": [],
+ "preview_rows": [
+ {"table_name": "t", "column_name": "", "critical_data_element": True, "_status": "ok", "_status_detail": "", "_truncated_fields": []},
+ ],
+ "metadata_columns": ["critical_data_element"],
+ }
+ result = _build_preview_props(preview)
+ assert result["preview_rows"][0]["critical_data_element"] == "Yes"
+
+
+def test_preview_props_cde_false():
+ preview = {
+ "table_rows": [{"table_id": "1", "table_name": "t", "critical_data_element": False}],
+ "column_rows": [],
+ "preview_rows": [
+ {"table_name": "t", "column_name": "", "critical_data_element": False, "_status": "ok", "_status_detail": "", "_truncated_fields": []},
+ ],
+ "metadata_columns": ["critical_data_element"],
+ }
+ result = _build_preview_props(preview)
+ assert result["preview_rows"][0]["critical_data_element"] == "No"
+
+
+def test_preview_props_cde_none():
+ preview = {
+ "table_rows": [],
+ "column_rows": [],
+ "preview_rows": [
+ {"table_name": "t", "column_name": "", "critical_data_element": None, "_status": "ok", "_status_detail": "", "_truncated_fields": []},
+ ],
+ "metadata_columns": ["critical_data_element"],
+ }
+ result = _build_preview_props(preview)
+ assert result["preview_rows"][0]["critical_data_element"] == ""
+
+
+def test_preview_props_unmatched_preserved():
+ preview = {
+ "table_rows": [],
+ "column_rows": [],
+ "preview_rows": [
+ {"table_name": "fake", "column_name": "", "_status": "unmatched", "_status_detail": "Table not found", "_truncated_fields": []},
+ ],
+ "metadata_columns": ["description"],
+ }
+ result = _build_preview_props(preview)
+ assert result["preview_rows"][0]["_status"] == "unmatched"