-
Notifications
You must be signed in to change notification settings - Fork 67
Expand file tree
/
Copy pathnaming.py
More file actions
125 lines (84 loc) · 4.34 KB
/
naming.py
File metadata and controls
125 lines (84 loc) · 4.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# (C) 2021 GoodData Corporation
from __future__ import annotations
from typing import Optional
import gooddata_sdk as sdk
def _sanitize_str_for_postgres(string: str, used_names: Optional[dict[str, bool]] = None) -> str:
# replace non-alpha-num stuff with underscores
with_underscores = "".join(char if char.isalnum() else "_" for char in string.lower())
# then get rid of sequences of underscores
candidate = "_".join([s for s in with_underscores.split("_") if s != ""])
if used_names is None:
return candidate
return _ensure_unique(candidate, used_names)
def _ensure_unique(candidate: str, used_names: dict[str, bool]) -> str:
# ensure column name uniqueness - in a dumb way by appending some number
if candidate in used_names:
i = 1
new_candidate = f"{candidate}_{i}"
while new_candidate in used_names:
i += 1
new_candidate = f"{candidate}_{i}"
return new_candidate
return candidate
#
# Column naming during IMPORT SCHEMA is delegated to strategies. The idea is that we may want to support
# different strategies and let user select the desired one through OPTIONS during import.
#
class InsightTableNamingStrategy:
def table_name_for_insight(self, insight: sdk.Visualization) -> str:
raise NotImplementedError()
class DefaultInsightTableNaming(InsightTableNamingStrategy):
def __init__(self) -> None:
self._uniques: dict[str, bool] = dict()
def table_name_for_insight(self, insight: sdk.Visualization) -> str:
new_name = _sanitize_str_for_postgres(insight.title, self._uniques)
self._uniques[new_name] = True
return new_name
class InsightColumnNamingStrategy:
def col_name_for_attribute(self, attr: sdk.VisualizationAttribute) -> str:
raise NotImplementedError()
def col_name_for_metric(self, metric: sdk.VisualizationMetric) -> str:
raise NotImplementedError()
class DefaultInsightColumnNaming(InsightColumnNamingStrategy):
def __init__(self) -> None:
self._uniques: dict[str, bool] = dict()
def col_name_for_attribute(self, attr: sdk.VisualizationAttribute) -> str:
new_name = _sanitize_str_for_postgres(attr.label_id, self._uniques)
self._uniques[new_name] = True
return new_name
def col_name_for_metric(self, metric: sdk.VisualizationMetric) -> str:
# if simple measure, use the item identifier (nice, readable)
# otherwise try alias
# otherwise try title
# otherwise use local_id (arbitrary, AD created local_ids are messy)
# TODO: improve this heuristic to get better names for derived measures
id_to_use = metric.item_id or metric.alias or metric.title or metric.local_id
new_name = _sanitize_str_for_postgres(id_to_use, self._uniques)
self._uniques[new_name] = True
return new_name
class CatalogNamingStrategy:
def col_name_for_label(self, attr: sdk.CatalogLabel) -> str:
raise NotImplementedError()
def col_name_for_fact(self, attr: sdk.CatalogFact) -> str:
raise NotImplementedError()
def col_name_for_metric(self, attr: sdk.CatalogMetric) -> str:
raise NotImplementedError()
class DefaultCatalogNamingStrategy:
def __init__(self) -> None:
self._uniques: dict[str, bool] = dict()
def _col_name_for_id_without_prefix(self, item_id: str, dataset: sdk.CatalogDataset) -> str:
ds_prefix = f"{dataset.id}."
# some of our tests project have convention where fact/label is as: dataset.dataset_something
# that looks awkward in a table.. thus this funny stuff
use_id = item_id if not item_id.startswith(f"{ds_prefix}{dataset.id}") else item_id[len(ds_prefix) :]
new_name = _sanitize_str_for_postgres(use_id, self._uniques)
self._uniques[new_name] = True
return new_name
def col_name_for_label(self, label: sdk.CatalogLabel, dataset: sdk.CatalogDataset) -> str:
return self._col_name_for_id_without_prefix(label.id, dataset)
def col_name_for_fact(self, fact: sdk.CatalogFact, dataset: sdk.CatalogDataset) -> str:
return self._col_name_for_id_without_prefix(fact.id, dataset)
def col_name_for_metric(self, metric: sdk.CatalogMetric) -> str:
new_name = _sanitize_str_for_postgres(metric.id, self._uniques)
self._uniques[new_name] = True
return new_name