Skip to content

Commit dc0c249

Browse files
committed
Implemented an option to write file overwrite diffs during a copy build step to a file, and warnings on identical file overwrites
Ticket: ENT-13116 Ticket: ENT-13117 Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com>
1 parent d520eef commit dc0c249

6 files changed

Lines changed: 155 additions & 8 deletions

File tree

cfbs/args.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class ArgsTypesNamespace(argparse.Namespace):
2727
git_user_email = None # type: Optional[str]
2828
git_commit_message = None # type: Optional[str]
2929
ignore_versions_json = False # type: bool
30+
diffs = None # type: Optional[str]
3031
omit_download = False # type: bool
3132
check_against_git = False # type: bool
3233
minimum_version = None # type: Optional[str]
@@ -161,6 +162,13 @@ def get_arg_parser(whitespace_for_manual=False):
161162
help="Ignore versions.json. Necessary in case of a custom index or testing changes to the default index.",
162163
action="store_true",
163164
)
165+
parser.add_argument(
166+
"--diffs",
167+
help="Write diffs of files overwritten with a copy build step during 'cfbs build' to the specified file",
168+
nargs="?",
169+
const="diffs.txt",
170+
default=None,
171+
)
164172
parser.add_argument(
165173
"--omit-download",
166174
help="Use existing masterfiles instead of downloading in 'cfbs generate-release-information'",

cfbs/build.py

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,16 @@
1717
from cfbs.utils import (
1818
canonify,
1919
cp,
20+
cp_dry_itemize,
2021
deduplicate_def_json,
22+
file_diff_text,
2123
find,
2224
merge_json,
2325
mkdir,
2426
pad_right,
2527
read_json,
2628
rm,
29+
save_file,
2730
sh,
2831
strip_left,
2932
touch,
@@ -129,8 +132,68 @@ def _perform_copy_step(args, source, destination, prefix):
129132
dst = ""
130133
print("%s copy '%s' 'masterfiles/%s'" % (prefix, src, dst))
131134
src, dst = os.path.join(source, src), os.path.join(destination, dst)
135+
136+
step_diffs_data = ""
137+
138+
itemization = cp_dry_itemize(src, dst)
139+
noop_overwrites_relpaths = []
140+
actual_overwrites_relpaths = []
141+
for item_string, file_relpath in itemization:
142+
if item_string[1] != "f":
143+
# only consider regular files
144+
continue
145+
if item_string[0] == "." or len(item_string) < 3:
146+
# the first character being a dot means that it's a no-op overwrite (possibly except file attributes)
147+
# explanation for the `< 3` comparison:
148+
# if all attributes are unchanged, the rsync item string will use spaces instead of dots and they will have been parsed away earlier
149+
noop_overwrites_relpaths.append(file_relpath)
150+
continue
151+
if item_string[2] == "+":
152+
# the copied file is new
153+
continue
154+
if item_string[2] == "c":
155+
# the copied file has a different checksum
156+
actual_overwrites_relpaths.append(file_relpath)
157+
continue
158+
elif item_string[2] == ".":
159+
# the copied regular file doesn't have a different checksum
160+
noop_overwrites_relpaths.append(file_relpath)
161+
log.debug("Novel item string: %s %s" % (item_string, file_relpath))
162+
for file_relpath in actual_overwrites_relpaths:
163+
if os.path.isfile(src):
164+
fileA = src
165+
else:
166+
fileA = os.path.join(src, file_relpath)
167+
if os.path.isfile(dst):
168+
fileB = dst
169+
else:
170+
fileB = os.path.join(dst, file_relpath)
171+
file_diff_data = file_diff_text(fileA, fileB)
172+
step_diffs_data += file_diff_data
173+
if len(noop_overwrites_relpaths) > 0:
174+
warning_message = (
175+
"Identical file overwrites occured during copy.\n"
176+
+ " Check your modules and their build steps to ascertain whether this is intentional.\n"
177+
+ " In most cases, the cause is a file from a latter module already being provided by an earlier module (commonly stock masterfiles).\n"
178+
+ " In that case, the file is best deleted from the latter module(s).\n"
179+
+ " Identical overwrites count: %s\n" % len(noop_overwrites_relpaths)
180+
)
181+
# display affected files, without flooding the output
182+
if len(noop_overwrites_relpaths) < 20:
183+
for overwrite_noop in noop_overwrites_relpaths:
184+
warning_message += " " + overwrite_noop + "\n"
185+
else:
186+
for overwrite_noop in noop_overwrites_relpaths[:9]:
187+
warning_message += " " + overwrite_noop + "\n"
188+
warning_message += " ...\n"
189+
for overwrite_noop in noop_overwrites_relpaths[-9:]:
190+
warning_message += " " + overwrite_noop + "\n"
191+
# display all the messages as one warning
192+
log.warning(warning_message)
132193
cp(src, dst)
133194

195+
return step_diffs_data
196+
134197

135198
def _perform_run_step(args, source, prefix):
136199
shell_command = " ".join(args)
@@ -316,7 +379,7 @@ def _perform_replace_version_step(module, i, args, name, destination, prefix):
316379
_perform_replacement(n, to_replace, version, filename)
317380

318381

319-
def perform_build(config: CFBSConfig) -> int:
382+
def perform_build(config: CFBSConfig, diffs_filename=None) -> int:
320383
if not config.get("build"):
321384
raise CFBSExitError("No 'build' key found in the configuration")
322385

@@ -349,6 +412,8 @@ def perform_build(config: CFBSConfig) -> int:
349412
% (step, expected, actual)
350413
)
351414

415+
diffs_data = ""
416+
352417
print("\nSteps:")
353418
max_length = config.longest_module_key_length("name")
354419
for module in config["build"]:
@@ -362,7 +427,8 @@ def perform_build(config: CFBSConfig) -> int:
362427
prefix = "%03d %s :" % (counter, pad_right(name, max_length))
363428

364429
if operation == "copy":
365-
_perform_copy_step(args, source, destination, prefix)
430+
step_diffs_data = _perform_copy_step(args, source, destination, prefix)
431+
diffs_data += step_diffs_data
366432
elif operation == "run":
367433
_perform_run_step(args, source, prefix)
368434
elif operation == "delete":
@@ -386,6 +452,14 @@ def perform_build(config: CFBSConfig) -> int:
386452
module, i, args, name, destination, prefix
387453
)
388454

455+
try:
456+
save_file(diffs_filename, diffs_data)
457+
except IsADirectoryError:
458+
print("")
459+
log.warning(
460+
"An existing directory was provided as the '--diffs' file path - writing the diffs file for the build failed - continuing build..."
461+
)
462+
389463
assert os.path.isdir("./out/masterfiles/")
390464
shutil.copyfile("./cfbs.json", "./out/masterfiles/cfbs.json")
391465
if os.path.isfile("out/masterfiles/def.json"):

cfbs/cfbs.1

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
.TH CFBS "1" "2025\-08\-14" "cfbs" "CFEngine Build System manual"
1+
.TH CFBS "1" "2025\-10\-03" "cfbs" "CFEngine Build System manual"
22
.SH NAME
33
cfbs \- combines multiple modules into 1 policy set to deploy on your infrastructure. Modules can be custom promise types, JSON files which enable certain functionality, or reusable CFEngine policy. The modules you use can be written by the CFEngine team, others in the community, your colleagues, or yourself.
44
.SH SYNOPSIS
55
.B cfbs
6-
[-h] [--loglevel LOGLEVEL] [-M] [--version] [--force] [--non-interactive] [--index INDEX] [--check] [--checksum CHECKSUM] [--keep-order] [--git {yes,no}] [--git-user-name GIT_USER_NAME] [--git-user-email GIT_USER_EMAIL] [--git-commit-message GIT_COMMIT_MESSAGE] [--ignore-versions-json] [--omit-download] [--check-against-git] [--from MINIMUM_VERSION] [--to-json [TO_JSON]] [--reference-version REFERENCE_VERSION] [--masterfiles-dir MASTERFILES_DIR] [--ignored-path-components [IGNORED_PATH_COMPONENTS ...]] [--offline] [--masterfiles MASTERFILES] [cmd] [args ...]
6+
[-h] [--loglevel LOGLEVEL] [-M] [--version] [--force] [--non-interactive] [--index INDEX] [--check] [--checksum CHECKSUM] [--keep-order] [--git {yes,no}] [--git-user-name GIT_USER_NAME] [--git-user-email GIT_USER_EMAIL] [--git-commit-message GIT_COMMIT_MESSAGE] [--ignore-versions-json] [--diffs [DIFFS]] [--omit-download] [--check-against-git] [--from MINIMUM_VERSION] [--to-json [TO_JSON]] [--reference-version REFERENCE_VERSION] [--masterfiles-dir MASTERFILES_DIR] [--ignored-path-components [IGNORED_PATH_COMPONENTS ...]] [--offline] [--masterfiles MASTERFILES] [cmd] [args ...]
77
.SH DESCRIPTION
88
CFEngine Build System.
99

@@ -72,6 +72,10 @@ Specify git commit message
7272
\fB\-\-ignore\-versions\-json\fR
7373
Ignore versions.json. Necessary in case of a custom index or testing changes to the default index.
7474

75+
.TP
76+
\fB\-\-diffs\fR \fI\,[DIFFS]\/\fR
77+
Write diffs of files overwritten with a copy build step during 'cfbs build' to the specified file
78+
7579
.TP
7680
\fB\-\-omit\-download\fR
7781
Use existing masterfiles instead of downloading in 'cfbs generate\-release\-information'

cfbs/commands.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -902,7 +902,7 @@ def download_command(force, ignore_versions=False):
902902

903903

904904
@cfbs_command("build")
905-
def build_command(ignore_versions=False):
905+
def build_command(ignore_versions=False, diffs_filename=None):
906906
config = CFBSConfig.get_instance()
907907
r = validate_config(config)
908908
if r != 0:
@@ -915,7 +915,7 @@ def build_command(ignore_versions=False):
915915
# so we try building anyway and don't return error(s)
916916
init_out_folder()
917917
_download_dependencies(config, ignore_versions=ignore_versions)
918-
r = perform_build(config)
918+
r = perform_build(config, diffs_filename)
919919
return r
920920

921921

cfbs/main.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,11 @@ def _main() -> int:
138138
% args.command
139139
)
140140

141+
if args.diffs and args.command != "build":
142+
raise CFBSUserError(
143+
"The option --diffs is only for 'cfbs build', not 'cfbs %s'" % args.command
144+
)
145+
141146
if args.non_interactive and args.command not in (
142147
"init",
143148
"add",
@@ -213,7 +218,9 @@ def _main() -> int:
213218
if args.command == "download":
214219
return commands.download_command(args.force)
215220
if args.command == "build":
216-
return commands.build_command(ignore_versions=args.ignore_versions_json)
221+
return commands.build_command(
222+
ignore_versions=args.ignore_versions_json, diffs_filename=args.diffs
223+
)
217224
if args.command == "install":
218225
return commands.install_command(args.args)
219226
if args.command == "update":

cfbs/utils.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import difflib
2+
import logging as log
13
import os
24
import re
35
import sys
@@ -10,7 +12,7 @@
1012
import urllib.error
1113
from collections import OrderedDict
1214
from shutil import rmtree
13-
from typing import Iterable, Union
15+
from typing import Iterable, List, Tuple, Union
1416

1517
from cfbs.pretty import pretty
1618

@@ -111,6 +113,24 @@ def display_diff(path_A, path_B):
111113
raise
112114

113115

116+
def file_diff(filepath_A, filepath_B):
117+
with open(filepath_A) as f:
118+
lines_A = f.readlines()
119+
with open(filepath_B) as f:
120+
lines_B = f.readlines()
121+
122+
u_diff = difflib.unified_diff(lines_A, lines_B, filepath_A, filepath_B)
123+
124+
return u_diff
125+
126+
127+
def file_diff_text(filepath_A, filepath_B):
128+
u_diff = file_diff(filepath_A, filepath_B)
129+
diff_text = "".join(u_diff)
130+
131+
return diff_text
132+
133+
114134
def mkdir(path: str, exist_ok=True):
115135
os.makedirs(path, exist_ok=exist_ok)
116136

@@ -147,6 +167,40 @@ def cp(src, dst):
147167
sh("rsync -r %s/ %s" % (src, dst))
148168

149169

170+
def cp_dry_itemize(src: str, dst: str) -> List[Tuple[str, str]]:
171+
above = os.path.dirname(dst)
172+
if not os.path.exists(above):
173+
mkdir(above)
174+
if dst.endswith("/") and not os.path.exists(dst):
175+
mkdir(dst)
176+
if os.path.isfile(src):
177+
result = subprocess.run(
178+
"rsync -rniic %s %s" % (src, dst),
179+
shell=True,
180+
check=True,
181+
stdout=subprocess.PIPE,
182+
)
183+
else:
184+
result = subprocess.run(
185+
"rsync -rniic %s/ %s" % (src, dst),
186+
shell=True,
187+
check=True,
188+
stdout=subprocess.PIPE,
189+
)
190+
lines = result.stdout.decode("utf-8").split("\n")
191+
itemization = []
192+
for line in lines:
193+
terms = line.split()
194+
if len(terms) == 2:
195+
item_string = terms[0]
196+
file_relpath = terms[1]
197+
itemization.append((item_string, file_relpath))
198+
elif len(terms) > 0:
199+
log.debug("Abnormal rsync output line: '%s'" % terms)
200+
201+
return itemization
202+
203+
150204
def pad_left(s, n):
151205
return s.rjust(n)
152206

0 commit comments

Comments
 (0)