-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdb_import_check.py
More file actions
191 lines (161 loc) · 6.41 KB
/
db_import_check.py
File metadata and controls
191 lines (161 loc) · 6.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/usr/bin/env powerscript --run-level=3 --nologin
# -*- mode: python; coding: utf-8 -*-
# pylint: disable=R0801
# During cdbimp entries are created in multiple tables.
# Diverse checks of the db_import.py script during runtime are not complete,
# because the already imported and checked table could be falsely extended later.
#
# Therefore we generate another diff after the migration is complete
# and before the CE services are started:
#
# 0. After db_import.py finished do not start the CE services.
# 1. Copy initial dump files to $CADDOK_BASE/c_cdbexp.
# 2. Copy this script to the /home/contact folder of the pod.
# 3. Start the script in the pod:
# powerscript --run-level=3 db_import_check.py
# 4. The script results are stored in RESULT_HTML_PATH, logs in LOG_PATH,
# and show in green new (unexpected) records in the DB, in red removed (unexpected) records.
import difflib
import logging
import os
import re
import subprocess # nosec B404
from cdb import sqlapi
EXPFILE_COUNT_LINE_RE = re.compile(r"^C(\d+)$")
RESULT_HTML_PATH = os.path.join(
os.environ.get("CADDOK_BASE"), "db_import", "db_check.html"
)
LOG_PATH = os.path.join(
os.environ.get("CADDOK_BASE"), "db_import", "db_import_check.log"
)
IGNORE_TABLES = [
"lstatistics",
"ldbserver",
"lmnames",
"lmodules",
"lusage",
]
logger = logging.getLogger(os.path.basename(__file__))
formatter = logging.Formatter("[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s")
handler = logging.FileHandler(LOG_PATH, encoding="utf-8")
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
def subprocess_run(args, stdin=None):
"""
Runs given command with subprocess.run, using combined stdout/stderr with
utf-8 encoding. If the process fails (rc != 0) than a
subprocess.CalledProcessError exception is thrown.
Logs subprocess STDOUT and STDERR.
:param args: command to be executed
:type args: List
:return: process object handle
"""
logger.debug("subprocess_run(%s)", subprocess.list2cmdline(args))
if stdin is not None:
logger.info("calling with stdin:\n%s", stdin)
try:
result = subprocess.run( # nosec subprocess_without_shell_equals_true
args, check=True, input=stdin, encoding="utf-8", capture_output=True
)
logger.info("subprocess_run STDOUT: %s", result.stdout)
if result.stderr:
logger.error("subprocess_run STDERR: %s", result.stderr)
except subprocess.CalledProcessError as exc:
if exc.stdout:
logger.error(exc.stdout)
if exc.stderr:
logger.error(exc.stderr)
raise
return result
def get_diff_in_html(file1, file2) -> str:
with open(file1, encoding="utf8") as f1, open(file2, encoding="utf8") as f2:
f1_lines = sorted(f1.readlines())
f2_lines = sorted(f2.readlines())
# Generate a unified diff with context lines (default 3)
diff = difflib.unified_diff(
f1_lines, f2_lines, fromfile=file1, tofile=file2, n=0
)
# Write the HTML content to the output file
html_output = f"<h2>Difference for {os.path.basename(file1)}</h2><pre>"
for line in diff:
if line.startswith("-"):
html_output += (
f'<span style="color: red;">{line}</span>' # Deleted line in red
)
elif line.startswith("+"):
html_output += (
f'<span style="color: green;">{line}</span>' # Added line in green
)
elif line.startswith("@@"):
html_output += f'<span style="color: blue;">{line}</span>' # Context header in blue
else:
html_output += line # Context lines in default color
html_output += "</pre><hr>\n"
return html_output
def export_table_to_file(table_name: str) -> str:
# Export suspicious table for comparison.
file_path = os.path.join(
os.environ.get("CADDOK_BASE"), "c_cdbexp", f"{table_name}.exp.latest"
)
with open("./tables.cf", "w", encoding="utf8") as file: # Open in write mode
file.write(f"* FROM {table_name}\n")
result_str = subprocess_run(
[
"cdbexp",
"-y",
"--unsorted",
"-c",
"./tables.cf",
"-o",
file_path,
]
).stdout
logger.info(result_str)
return file_path
def count_expected_rows(exp_file_path: str) -> int:
"""
Each cdbexp file contains a line that counts the lines in the file, formatted like "C1234". This function matches
this line and returns the amount of rows that should have been imported into the database.
"""
with open(exp_file_path, "r", encoding="utf-8") as exp_file:
for line in exp_file:
match = re.match(EXPFILE_COUNT_LINE_RE, line)
if not match:
continue
return int(match.group(1))
raise RuntimeError(f"Did not find count of lines in cdbexp file {exp_file_path}")
def main():
with open(RESULT_HTML_PATH, "w", encoding="utf8") as html_output:
html_output.write("<html><body><h1>Database Table Differences</h1>")
rset = sqlapi.RecordSet2(
"cdb_tables", "type='T'", ("table_name",), "order by table_name"
)
for row in rset:
table_name = row["table_name"]
if table_name in IGNORE_TABLES:
logger.info("Skipping table %s", table_name)
continue
original_dump_file_path = os.path.join(
os.environ.get("CADDOK_BASE"), "c_cdbexp", f"{table_name}.exp"
)
imported_rows = int(
sqlapi.SQLnumber(sqlapi.SQLselect(f"COUNT(*) FROM {table_name}"), 0, 0)
)
expected_rows = count_expected_rows(original_dump_file_path)
if imported_rows != expected_rows:
logger.info(
"Expected %s lines in %s but found %s.",
expected_rows,
table_name,
imported_rows,
)
latest_dump_file_path = export_table_to_file(table_name)
html_output.write(
get_diff_in_html(original_dump_file_path, latest_dump_file_path)
)
# Close the HTML content
html_output.write("</body></html>")
logger.info("Done.")
if __name__ == "__main__":
main()