Skip to content

Commit e649567

Browse files
committed
add docker-helper.sh to lift examples using pac-nix docker image
this introduces the src/test/make/docker-helper.sh script which is a (hopefully) developer-friendly method for lifting examples in a Docker container provided by [pac-nix][]. to ensure reproducibility, this uses an x86_64-linux docker container with cross-compiler provided by nix. for more details, also see #288. the usage is comprehensively documented in [nix-docker-build readme]. the general flow is: 1. activate the docker-helper.sh environment: `eval $(docker-helper.sh env)`. (you will need to re-run this if you close your terminal) 2. pull the docker image: `docker-helper.sh pull` 3. start the docker container: `docker-helper.sh start` 4. (optional) clean the directory you want to lift: `make SUBDIRS=extraspec_incorrect/malloc_memcpy_strlen_memset_free clean` 5. compile and lift the directory: `make SUBDIRS=extraspec_incorrect/malloc_memcpy_strlen_memset_free` 6. commit the results (eventually, we want to move the binaries into another repo. but until then, committing is fine.) [pac-nix]: https://github.com/katrinafyi/pac-nix/ [nix-docker-build readme]: https://github.com/UQ-PAC/BASIL/blob/nix-docker-build/src/test/readme.md
1 parent 28d4cfb commit e649567

10 files changed

Lines changed: 482 additions & 41 deletions

File tree

.gitattributes

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
11
*.mill linguist-language=Scala
22
mill linguist-generated
33
mill.bat linguist-generated
4+
5+
src/test/compiled.url.txt text eol=lf
6+
src/test/make/docker-flake.txt text eol=lf
7+
8+
Makefile text eol=lf
9+
*.sh text eol=lf
10+
*.mk text eol=lf
11+
*.md5sum text eol=lf linguist-generated

src/test/Makefile

Lines changed: 56 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,60 @@
1-
TARGETSS := all verify clean cleanall cleanlift recompile json cleanjson cleangts gts
1+
TARGETSS := all verify repro-stash repro-check md5sum-check md5sum-update clean cleanall cleanlift recompile json cleanjson cleangts gts
22

3-
SUBTARGETS = $(wildcard correct/*/ incorrect/*/)
4-
.PHONY : $(TARGETSS) $(SUBTARGETS) correct incorrect
3+
# subdirectories of src/test. to be entered into by this makefile.
4+
DIRS := correct incorrect extraspec_correct extraspec_incorrect indirect_calls memory_regions procedure_summaries
5+
# dirs with different directory structure: dsa, irreducible_loops
56

6-
$(TARGETSS): $(SUBTARGETS)
7+
# non-test dirs: make, scala, unimplemented
78

8-
correct: $(realpath $(wildcard correct/*))
9-
incorrect: $(realpath $(wildcard incorrect/*))
9+
# in case the user specfies DIRS, make sure all dirs exist.
10+
$(foreach d, $(DIRS), \
11+
$(if $(wildcard $(d)/.), \
12+
, \
13+
$(error user error: directory "$(d)" in DIRS variable does not exist)))
14+
15+
SUBDIRS = $(wildcard $(addsuffix /*/,$(DIRS)))
16+
.PHONY : $(TARGETSS) $(SUBDIRS) $(DIRS)
17+
18+
# through some unpleasantness, this lets the user specify either DIRS or SUBDIRS
19+
# on the command line, and the make operation will be narrowed to that directory
20+
$(TARGETSS): $(SUBDIRS)
21+
22+
$(SUBDIRS):
23+
$(MAKE) -C $@ -f $(realpath ./make/lift-directories.mk) $(MAKECMDGOALS)
24+
25+
# concats md5sums files in subdirectories into a compiled.md5sum.
26+
# check with `md5sum -c compiled.md5sum` in src/test.
27+
.PHONY: compiled.md5sum
28+
compiled.md5sum:
29+
find $(DIRS) -name '*.md5sum' -exec cat '{}' + | sort -k2 > compiled.md5sum
30+
31+
TARBALL := compiled.tar.zst
32+
33+
$(TARBALL) docker-contents.txt &: compiled.md5sum
34+
set -u; $$DOCKER_CMD hash > docker-contents.txt # before compessing, make sure docker-contents.txt is up to date.
35+
md5sum --quiet -c compiled.md5sum # before compressing, make sure our files match expected hashes.
36+
list=`mktemp`; cut -d' ' -f3 compiled.md5sum > $$list && tar caf $(TARBALL) -T $$list && rm $$list
37+
sha1sum $(TARBALL)
38+
39+
.PHONY: extract
40+
extract:
41+
# log URL and expected hash
42+
{ head -n1 compiled.url.txt; tail -n1 compiled.url.txt; } | cat -v
43+
# check existing file, otherwise download fresh copy.
44+
{ tail -n1 compiled.url.txt | sha1sum -c - ; } \
45+
|| curl "$$(head -n1 compiled.url.txt)" -o $(TARBALL)
46+
# check file type.
47+
-file $(TARBALL)
48+
# validate the hash, otherwise remove the incorrect file and abort.
49+
{ tail -n1 compiled.url.txt | sha1sum -c - ; } || { rm -v $(TARBALL); exit 1; }
50+
tar xf $(TARBALL) --keep-old-files --touch
51+
md5sum --quiet -c compiled.md5sum # check that extracted files match expected checksums
52+
53+
.PHONY: push
54+
push:
55+
tmp=`mktemp -d` && \
56+
git clone git@github.com:UQ-PAC/basil-tests.git $$tmp --single-branch --branch basil-src-test && \
57+
cd $$tmp && \
58+
59+
rm -rf $$tmp
1060

11-
$(SUBTARGETS):
12-
-$(MAKE) -C $@ -f $(realpath ./make/lift-directories.mk) $(MAKECMDGOALS)

src/test/make/bap-normalise.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#!/usr/bin/env python3
2+
# vim: ts=2 sts=2 et sw=2
3+
4+
"""
5+
BAP .adt / .bir file normaliser:
6+
7+
usage:
8+
bap-normalise.py ADT-FILE BIR-FILE
9+
10+
both arguments are required in that order!
11+
files will be modified in-place.
12+
13+
+00000540: main_argv :: in out u64 = R1
14+
+00000541: main_result :: out u32 = low:32[R0]
15+
16+
00000199:
17+
0000019c: R1 := 0x41F000
18+
@@ -187,7 +187,7 @@
19+
000001c8: call R30 with noreturn
20+
21+
00000515: sub register_tm_clones(register_tm_clones_result)
22+
-00000529: register_tm_clones_result :: out u32 = low:32[R0]
23+
+00000542: register_tm_clones_result :: out u32 = low:32[R0]
24+
25+
0000028e:
26+
00000291: R0 := 0x420000
27+
@@ -199,13 +199,13 @@
28+
000002b6: R1 := R2 + (R1 ~>> 3)
29+
000002bc: R1 := extend:64[63:1[R1]]
30+
000002c2: when R1 = 0 goto %000002c0
31+
-00000516: goto %00000337
32+
+0000052f: goto %00000337
33+
34+
00000337:
35+
0000033a: R2 := 0x41F000
36+
00000341: R2 := mem[R2 + 0xFF8, el]:u64
37+
00000346: when R2 = 0 goto %000002c0
38+
-00000517: goto %0000034a
39+
+00000530: goto %0000034a
40+
41+
"""
42+
43+
import sys
44+
import re
45+
46+
adt_file = sys.argv[1]
47+
bir_file = sys.argv[2]
48+
assert len(sys.argv) == 3
49+
50+
string_re = re.compile(rb'''"((?:[^"\\]|\\.)*)"''')
51+
hexstring_re = re.compile(rb'''"%([\da-fA-F]{8})"''')
52+
tid_re = re.compile(rb'''Tid\(([_\d]+),''')
53+
bir_re = re.compile(rb'''(?:^([\da-fA-F]{8}):)|(?: %([\da-fA-F]{8}))''', re.MULTILINE)
54+
55+
with open(adt_file, 'rb') as f:
56+
adt = f.read()
57+
58+
tids: dict[int, int] = {} # map of old tid to their first position in adt
59+
for match in re.finditer(tid_re, adt):
60+
tid = int(match[1].replace(b'_', b''))
61+
if tid not in tids:
62+
tids[tid] = match.start()
63+
64+
assert tids, f'adt file {adt_file} has no Tid() values??'
65+
66+
keys = list(tids.keys())
67+
keys.sort(key=tids.__getitem__)
68+
69+
new_tids = {tid: 4*i for i, tid in enumerate(keys)}
70+
71+
# .adt file
72+
73+
def sub_adt(m: re.Match[bytes]) -> bytes:
74+
tid = int(m[1].replace(b'_', b''))
75+
new = new_tids[tid]
76+
return f'Tid({new:_},'.encode('ascii')
77+
def sub_adt_strings(m: re.Match[bytes]) -> bytes:
78+
tid = int(m[1], 16)
79+
new = new_tids[tid]
80+
return f'"%{new:08x}"'.encode('ascii')
81+
82+
new_adt = re.sub(tid_re, sub_adt, adt)
83+
new_adt = re.sub(hexstring_re, sub_adt_strings, new_adt)
84+
85+
# .bir file
86+
87+
# print(new_tids)
88+
bir_seen = set()
89+
def sub_bir(m: re.Match[bytes]) -> bytes:
90+
old = m[1] or m[2]
91+
tid = int(old, 16)
92+
bir_seen.add(tid)
93+
assert tid in new_tids, f"{m}"
94+
new = new_tids[tid]
95+
return m[0].replace(old, f'{new:08x}'.encode('ascii'))
96+
97+
with open(bir_file, 'rb') as f:
98+
bir = f.read()
99+
100+
new_bir = re.sub(bir_re, sub_bir, bir)
101+
adt_seen = set(new_tids)
102+
assert bir_seen == adt_seen, f'not equal!\nbir - adt =\n{bir_seen - adt_seen}\nadt - bir =\n{adt_seen - bir_seen}'
103+
104+
assert new_adt != adt
105+
assert new_bir != bir
106+
107+
# writeback only if both are successful
108+
109+
with open(bir_file, 'wb') as f:
110+
f.write(new_bir)
111+
112+
with open(adt_file, 'wb') as f:
113+
f.write(new_adt)

src/test/make/docker-flake.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
github:katrinafyi/pac-nix/569afdf78558de82c24d25e12680157c3b0aa3df#basil-tools-docker

src/test/make/docker-helper.sh

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
#!/usr/bin/env bash
2+
set -ue
3+
4+
if [[ -z "${GIT_ROOT:-}" ]] && command -v git &>/dev/null; then
5+
GIT_ROOT=$(git rev-parse --show-toplevel)
6+
fi
7+
: ${GIT_ROOT}
8+
DIR=$(realpath --relative-to "$GIT_ROOT" .)
9+
10+
: ${DOCKER:=podman}
11+
: ${DOCKER_PLATFORM:=--platform linux\/amd64}
12+
: ${DOCKER_USER:=root}
13+
: ${DOCKER_IMAGE:=ghcr.io/uq-pac/basil-tools-docker}
14+
15+
if [[ $# -lt 1 ]] || [[ "$1" == --help ]]; then
16+
echo "usage: $(basename $0) (pull | push | build | start | stop | shell | hash | env [--unset] | COMMAND...)"
17+
! [[ $# -lt 1 ]]
18+
exit
19+
fi
20+
21+
DOCKER_CMD="$(realpath $0)"
22+
23+
if [[ -z "${DOCKER_FLAKE:-}" ]] && [[ -r "$(dirname $DOCKER_CMD)/docker-flake.txt" ]]; then
24+
DOCKER_FLAKE="$(cat $(dirname $DOCKER_CMD)/docker-flake.txt)"
25+
fi
26+
27+
: $DOCKER_FLAKE
28+
29+
# create unique names depending on the flake reference, to ensure the correct container
30+
# is used.
31+
# unique names depend only on DOCKER_FLAKE, allowing them to be computed without nix.
32+
commit=$(printf '%s' "$DOCKER_FLAKE" | grep --only-matching -E '[0-9a-fA-F]{40}' | head -c8)
33+
flake_hash=flake-$(printf '%s' "$DOCKER_FLAKE" | md5sum | cut -d' ' -f1 | head -c4)
34+
35+
if [[ -z "${DOCKER_TAG:-}" ]]; then
36+
DOCKER_TAG="$flake_hash-$commit"
37+
fi
38+
39+
unique_image="$DOCKER_IMAGE:$DOCKER_TAG"
40+
unique_container="container-$DOCKER_TAG"
41+
42+
# this allows the env subcommand to output syntax compatible with multiple shells
43+
shell=$(basename $SHELL)
44+
if [[ $shell == fish ]]; then
45+
unset='set --erase'
46+
unalias='functions --erase'
47+
eval='('
48+
else
49+
unset=unset
50+
unalias=unalias
51+
eval='$('
52+
fi
53+
54+
55+
if [[ "$1" == pull ]]; then
56+
# pulls the unique image from the registry
57+
set -x
58+
exec $DOCKER pull $DOCKER_PLATFORM "$unique_image"
59+
60+
elif [[ "$1" == push ]]; then
61+
# pushes the unique image to the registry. image must already exist locally.
62+
set -x
63+
exec $DOCKER push "$unique_image"
64+
65+
elif [[ "$1" == build ]]; then
66+
# builds the docker image for running tools.
67+
# safe to re-run. if docker image is already up-to-date, should be reasonably fast.
68+
nix build "$DOCKER_FLAKE" --no-link
69+
nix build "$DOCKER_FLAKE.conf" --no-link
70+
conf=$(nix build "$DOCKER_FLAKE.conf" --no-link --print-out-paths)
71+
tag=$(nix eval --expr "with builtins; (fromJSON (unsafeDiscardStringContext (readFile $conf))).repo_tag" --impure --raw)
72+
if ! [[ "$tag" == "$DOCKER_IMAGE":* ]]; then
73+
printf '%s %s %s.\n' \
74+
"ERROR: docker image names do not match!" \
75+
"nix flake will build '$tag', but" \
76+
"DOCKER_IMAGE is '$DOCKER_IMAGE'" >&2
77+
exit 1
78+
fi
79+
set -x
80+
$(nix build "$DOCKER_FLAKE" --no-link --print-out-paths) | "$DOCKER" image load
81+
$DOCKER image tag "$tag" $unique_image
82+
exit
83+
84+
elif [[ "$1" == start ]]; then
85+
# starts an instance of the docker image.
86+
set -x
87+
exec $DOCKER run $DOCKER_PLATFORM -v"$GIT_ROOT:$GIT_ROOT" --rm -td --user $DOCKER_USER --name $unique_container $unique_image
88+
89+
elif [[ "$1" == stop ]]; then
90+
# stops the instance of the docker image.
91+
set -x
92+
exec $DOCKER stop -t 1 $unique_container
93+
# since --rm is given to `docker run`, this will also remove the container.
94+
95+
elif [[ "$1" == shell ]]; then
96+
# enters an interactive shell within the container.
97+
set -x
98+
exec $DOCKER exec -it --user $DOCKER_USER -w "$GIT_ROOT/$DIR" -eshell=1 $unique_container /usr/bin/_exec bash
99+
100+
elif [[ "$1" == hash ]]; then
101+
# outputs information about the docker image's version to stdout.
102+
echo "$DOCKER_FLAKE"
103+
echo
104+
exec "$DOCKER_CMD" bash -c 'ls -1 /nix/store | sort -k1.33' # sort /nix/store contents by name, not hash
105+
106+
elif [[ "$1" == env ]]; then
107+
# outputs commands to set the environment to stdout.
108+
# when passed to `eval`, these commands should prepare the shell for running
109+
# basil tests through docker.
110+
111+
# if --unset is used, removes all definitions
112+
isunset=$([[ $# -ge 2 ]] && [[ "$2" == --unset ]] && echo true || echo false)
113+
# if --reset is used, removes all definitions, then re-adds them based on defaults
114+
isreset=$([[ $# -ge 2 ]] && [[ "$2" == --reset ]] && echo true || echo false)
115+
116+
if $isreset; then
117+
isunset=true
118+
fi
119+
120+
function echoexport() {
121+
if $isunset; then
122+
echo echo "$unset" "$1" ';'
123+
echo "$unset" "$1" ';'
124+
return
125+
fi
126+
printf 'echo "%s = %s";\n' "$1" "$2"
127+
printf 'export %s="%s";\n' "$1" "$2"
128+
}
129+
130+
echoexport USE_DOCKER "1"
131+
echoexport DOCKER_FLAKE "$DOCKER_FLAKE"
132+
echoexport DOCKER_IMAGE "$DOCKER_IMAGE"
133+
echoexport DOCKER_TAG "$DOCKER_TAG"
134+
echoexport DOCKER_PLATFORM "$DOCKER_PLATFORM"
135+
echoexport DOCKER "$DOCKER"
136+
echoexport DOCKER_USER "$DOCKER_USER"
137+
echoexport DOCKER_CMD "$DOCKER_CMD"
138+
echoexport GIT_ROOT "$GIT_ROOT"
139+
echo 'echo;'
140+
echoexport GCC "$DOCKER_CMD aarch64-unknown-linux-gnu-gcc"
141+
echoexport CLANG "$DOCKER_CMD aarch64-unknown-linux-gnu-clang"
142+
echoexport READELF "$DOCKER_CMD aarch64-unknown-linux-gnu-readelf"
143+
echoexport BAP "$DOCKER_CMD bap"
144+
echoexport DDISASM "$DOCKER_CMD ddisasm"
145+
echoexport PROTO_JSON "$DOCKER_CMD proto-json.py"
146+
# echoexport PROTO_JSON "/home/rina/progs/gtirb-semantics/scripts/proto-json.py"
147+
echoexport DEBUG_GTS "$DOCKER_CMD debug-gts.py"
148+
echoexport GTIRB_SEMANTICS "$DOCKER_CMD gtirb-semantics"
149+
echo 'echo;'
150+
if $isunset; then
151+
echo "echo $unalias docker-helper.sh;"
152+
echo "$unalias docker-helper.sh;"
153+
else
154+
echo "echo alias docker-helper.sh = '$DOCKER_CMD';"
155+
echo "alias 'docker-helper.sh=$DOCKER_CMD';"
156+
fi
157+
158+
if $isreset; then
159+
echo "eval $eval$DOCKER_CMD env);"
160+
fi
161+
exit
162+
fi
163+
164+
if [[ -n "${NIX_BUILD_TOP:-}" ]]; then
165+
set -x
166+
# if already inside a Nix shell, simply execute
167+
exec /usr/bin/_exec "$@"
168+
else
169+
set -x
170+
# for other commands, execute within the container.
171+
exec $DOCKER exec --user $DOCKER_USER -w "$GIT_ROOT/$DIR" $unique_container /usr/bin/_exec "$@"
172+
fi

src/test/make/gcc.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
CC=$(GCC)
2+
CFLAGS += -pie
23
include $(GIT_ROOT)/src/test/make/lift.mk

src/test/make/gcc_O2.mk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
CC=$(GCC)
2-
CFLAGS += -O2
2+
CFLAGS += -pie -O2
33
include $(GIT_ROOT)/src/test/make/lift.mk

src/test/make/gcc_pic.mk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
CC=$(GCC)
2-
CFLAGS += -fpic
2+
CFLAGS += -pie -fpic
33
include $(GIT_ROOT)/src/test/make/lift.mk

0 commit comments

Comments
 (0)