Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 10 additions & 18 deletions datafaker/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from sqlalchemy.schema import CreateColumn, CreateSchema, CreateTable, MetaData, Table

from datafaker.base import FileUploader, TableGenerator
from datafaker.settings import get_settings
from datafaker.settings import get_destination_dsn, get_destination_schema
from datafaker.utils import (
create_db_engine,
get_sync_engine,
Expand Down Expand Up @@ -60,15 +60,12 @@ def remove_on_delete_cascade(element: CreateTable, compiler: Any, **kw: Any) ->

def create_db_tables(metadata: MetaData) -> None:
"""Create tables described by the sqlalchemy metadata object."""
settings = get_settings()
dst_dsn: str = settings.dst_dsn or ""
assert dst_dsn != "", "Missing DST_DSN setting."

dst_dsn = get_destination_dsn()
engine = get_sync_engine(create_db_engine(dst_dsn))
schema_name = get_destination_schema()

# Create schema, if necessary.
if settings.dst_schema:
schema_name = settings.dst_schema
if schema_name is not None:
with engine.connect() as connection:
# Do not try to create a schema if the schema already exists.
# This is necessary if the user does not have schema creation privileges
Expand Down Expand Up @@ -97,12 +94,11 @@ def create_db_vocab(
:param config: The configuration from --config-file
:return: List of table names loaded.
"""
settings = get_settings()
dst_dsn: str = settings.dst_dsn or ""
assert dst_dsn != "", "Missing DST_DSN setting."

dst_engine = get_sync_engine(
create_db_engine(dst_dsn, schema_name=settings.dst_schema)
create_db_engine(
get_destination_dsn(),
schema_name=get_destination_schema(),
)
)

tables_loaded: list[str] = []
Expand Down Expand Up @@ -137,16 +133,12 @@ def create_db_data(
metadata: MetaData,
) -> RowCounts:
"""Connect to a database and populate it with data."""
settings = get_settings()
dst_dsn: str = settings.dst_dsn or ""
assert dst_dsn != "", "Missing DST_DSN setting."

return create_db_data_into(
sorted_tables,
df_module,
num_passes,
dst_dsn,
settings.dst_schema,
get_destination_dsn(),
get_destination_schema(),
metadata,
)

Expand Down
83 changes: 56 additions & 27 deletions datafaker/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from jsonschema.exceptions import ValidationError
from jsonschema.validators import validate
from sqlalchemy import MetaData, Table
from sqlalchemy.exc import InternalError, OperationalError
from typer import Argument, Exit, Option, Typer

from datafaker.create import create_db_data, create_db_tables, create_db_vocab
Expand All @@ -34,7 +35,14 @@
make_vocabulary_tables,
)
from datafaker.remove import remove_db_data, remove_db_tables, remove_db_vocab
from datafaker.settings import Settings, get_settings
from datafaker.settings import (
Settings,
SettingsError,
get_destination_dsn,
get_destination_schema,
get_source_dsn,
get_source_schema,
)
from datafaker.utils import (
CONFIG_SCHEMA_PATH,
conf_logger,
Expand All @@ -59,6 +67,19 @@
app = Typer(no_args_is_help=True)


def datafaker() -> None:
"""Run the app and catch internal exceptions."""
try:
app()
except OperationalError as exc:
logger.error(str(exc))
# Outside of app() typer.Exit(1) doesn't work
sys.exit(1)
except SettingsError as exc:
logger.error(str(exc))
sys.exit(1)


def _check_file_non_existence(file_path: Path) -> None:
"""Check that a given file does not exist. Exit with an error message if it does."""
if file_path.exists():
Expand Down Expand Up @@ -294,9 +315,6 @@ def make_vocab(
Example:
$ datafaker make-vocab --config-file config.yml
"""
settings = get_settings()
_require_src_db_dsn(settings)

generator_config = read_config_file(config_file) if config_file is not None else {}
orm_metadata = load_metadata(orm_file, generator_config)
make_vocabulary_tables(
Expand Down Expand Up @@ -331,11 +349,12 @@ def make_stats(

config = read_config_file(config_file) if config_file is not None else {}

settings = get_settings()
src_dsn: str = _require_src_db_dsn(settings)

src_stats = asyncio.get_event_loop().run_until_complete(
make_src_stats(src_dsn, config, settings.src_schema)
make_src_stats(
get_source_dsn(),
config,
get_source_schema(),
)
)
stats_file_path.write_text(yaml.dump(src_stats), encoding="utf-8")
logger.debug("%s created.", stats_file)
Expand Down Expand Up @@ -369,10 +388,11 @@ def make_tables(
if not force:
_check_file_non_existence(orm_file_path)

settings = get_settings()
src_dsn: str = _require_src_db_dsn(settings)

content = make_tables_file(src_dsn, settings.src_schema, parquet_dir)
content = make_tables_file(
get_source_dsn(),
get_source_schema(),
parquet_dir,
)
orm_file_path.write_text(content, encoding="utf-8")
logger.debug("%s created.", orm_file)

Expand All @@ -386,8 +406,6 @@ def configure_tables(
) -> None:
"""Interactively set tables to ignored, vocabulary or primary private."""
logger.debug("Configuring tables in %s.", config_file)
settings = get_settings()
src_dsn: str = _require_src_db_dsn(settings)
config_file_path = Path(config_file)
config = {}
if config_file_path.exists():
Expand All @@ -397,7 +415,10 @@ def configure_tables(
# we don't pass config here so that no tables are ignored
metadata = load_metadata(orm_file)
config_updated = update_config_tables(
src_dsn, settings.src_schema, metadata, config
get_source_dsn(),
get_source_schema(),
metadata,
config,
)
if config_updated is None:
logger.debug("Cancelled")
Expand All @@ -416,8 +437,6 @@ def configure_missing(
) -> None:
"""Interactively set the missingness of the generated data."""
logger.debug("Configuring missingness in %s.", config_file)
settings = get_settings()
src_dsn: str = _require_src_db_dsn(settings)
config_file_path = Path(config_file)
config: dict[str, Any] = {}
if config_file_path.exists():
Expand All @@ -427,7 +446,12 @@ def configure_missing(
if isinstance(config_any, dict):
config = config_any
metadata = load_metadata(orm_file, config)
config_updated = update_missingness(src_dsn, settings.src_schema, metadata, config)
config_updated = update_missingness(
get_source_dsn(),
get_source_schema(),
metadata,
config,
)
if config_updated is None:
logger.debug("Cancelled")
return
Expand All @@ -452,8 +476,6 @@ def configure_generators(
) -> None:
"""Interactively set generators for column data."""
logger.debug("Configuring generators in %s.", config_file)
settings = get_settings()
src_dsn: str = _require_src_db_dsn(settings)
config_file_path = Path(config_file)
config = {}
if config_file_path.exists():
Expand All @@ -462,7 +484,11 @@ def configure_generators(
)
metadata = load_metadata(orm_file)
config_updated = update_config_generators(
src_dsn, settings.src_schema, metadata, config, spec_path=spec
get_source_dsn(),
get_source_schema(),
metadata,
config,
spec_path=spec,
)
if config_updated is None:
logger.debug("Cancelled")
Expand Down Expand Up @@ -576,10 +602,8 @@ def dump_data(
" specified, or specify an existing directory"
)
sys.exit(1)
settings = get_settings()
dst_dsn: str = settings.dst_dsn or ""
assert dst_dsn != "", "Missing DST_DSN setting."
schema_name = settings.dst_schema
dst_dsn = get_destination_dsn()
schema_name = get_destination_schema()
config = read_config_file(config_file) if config_file is not None else {}
metadata = load_metadata_for_output(orm_file, config)
mtables = convert_table_names_to_tables(table, metadata)
Expand Down Expand Up @@ -677,7 +701,12 @@ def remove_tables(
else:
config = read_config_file(config_file)
metadata = load_metadata_for_output(orm_file, config)
remove_db_tables(metadata)
try:
remove_db_tables(metadata)
except InternalError as exc:
logger.error("Failed to drop tables: %s", exc)
logger.error("Please try again using the --all option.")
sys.exit(1)
logger.debug("Tables dropped.")
else:
logger.info("Would remove tables if called with --yes.")
Expand Down Expand Up @@ -727,4 +756,4 @@ def version() -> None:


if __name__ == "__main__":
app()
datafaker()
16 changes: 9 additions & 7 deletions datafaker/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

from datafaker import providers
from datafaker.parquet2orm import get_parquet_orm
from datafaker.settings import get_settings
from datafaker.settings import get_source_dsn, get_source_schema
from datafaker.utils import (
MaybeAsyncEngine,
create_db_engine,
Expand Down Expand Up @@ -453,11 +453,12 @@ def _get_provider_for_column(column: Column) -> Tuple[list[str], str, dict[str,
if not generator_function:
generator_function = "generic.null_provider.null"
logger.warning(
"Unsupported SQLAlchemy type %s for column %s. "
"Unsupported SQLAlchemy type %s for column %s of table %s. "
"Setting this column to NULL always, "
"you may want to configure a row generator for it instead.",
column.type,
column.name,
column.table.name,
)

return variable_names, generator_function, generator_arguments
Expand Down Expand Up @@ -551,11 +552,12 @@ def make_vocabulary_tables(
table_names: set[str] | None = None,
) -> None:
"""Extract the data from the source database for each vocabulary table."""
settings = get_settings()
src_dsn: str = settings.src_dsn or ""
assert src_dsn != "", "Missing SRC_DSN setting."

engine = get_sync_engine(create_db_engine(src_dsn, schema_name=settings.src_schema))
engine = get_sync_engine(
create_db_engine(
get_source_dsn(),
schema_name=get_source_schema(),
)
)
vocab_names = get_vocabulary_table_names(config)
if table_names is None:
table_names = vocab_names
Expand Down
23 changes: 13 additions & 10 deletions datafaker/remove.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from sqlalchemy import MetaData, delete

from datafaker.settings import get_settings
from datafaker.settings import get_destination_dsn, get_destination_schema
from datafaker.utils import (
create_db_engine,
get_sync_engine,
Expand All @@ -17,10 +17,11 @@

def remove_db_data(metadata: MetaData, config: Mapping[str, Any]) -> None:
"""Truncate the synthetic data tables but not the vocabularies."""
settings = get_settings()
assert settings.dst_dsn, "Missing destination database settings"
remove_db_data_from(
metadata, config, settings.dst_dsn, schema_name=settings.dst_schema
metadata,
config,
get_destination_dsn(),
schema_name=get_destination_schema(),
)


Expand All @@ -41,10 +42,11 @@ def remove_db_vocab(
metadata: MetaData, meta_dict: Mapping[str, Any], config: Mapping[str, Any]
) -> None:
"""Truncate the vocabulary tables."""
settings = get_settings()
assert settings.dst_dsn, "Missing destination database settings"
dst_engine = get_sync_engine(
create_db_engine(settings.dst_dsn, schema_name=settings.dst_schema)
create_db_engine(
get_destination_dsn(),
schema_name=get_destination_schema(),
)
)

with dst_engine.connect() as dst_conn:
Expand All @@ -58,10 +60,11 @@ def remove_db_vocab(

def remove_db_tables(metadata: Optional[MetaData]) -> None:
"""Drop the tables in the destination schema."""
settings = get_settings()
assert settings.dst_dsn, "Missing destination database settings"
dst_engine = get_sync_engine(
create_db_engine(settings.dst_dsn, schema_name=settings.dst_schema)
create_db_engine(
get_destination_dsn(),
schema_name=get_destination_schema(),
)
)
if metadata is None:
metadata = MetaData()
Expand Down
Loading