forked from static-analysis-engineering/CodeHawk-Binary
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcommandutil.py
More file actions
2663 lines (2202 loc) · 82.8 KB
/
commandutil.py
File metadata and controls
2663 lines (2202 loc) · 82.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# ------------------------------------------------------------------------------
# CodeHawk Binary Analyzer
# Author: Henny Sipma
# ------------------------------------------------------------------------------
# The MIT License (MIT)
#
# Copyright (c) 2021-2025 Aarno Labs, LLC
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# ------------------------------------------------------------------------------
"""Support functions for the command-line interpreter."""
import logging
import argparse
from chb.elfformat.ELFHeader import ELFHeader
from chb.peformat.PEHeader import PEHeader
import datetime
import json
import os
import shutil
import subprocess
import sys
from typing import (
Any,
Callable,
Type,
Union,
cast,
Dict,
Iterable,
List,
Optional,
NoReturn,
Set,
Tuple,
Sequence,
TYPE_CHECKING)
import xml.etree.ElementTree as ET
from chb.app.AppAccess import AppAccess
from chb.app.Assembly import Assembly
from chb.app.Callgraph import CallgraphNode
from chb.arm.ARMAccess import ARMAccess
from chb.arm.ARMAssembly import ARMAssembly
from chb.bctypes.BCFiles import BCFiles
from chb.cmdline.AnalysisManager import AnalysisManager
from chb.invariants.InputConstraint import InputConstraint
from chb.invariants.XXpr import XXpr
from chb.invariants.XVariable import XVariable
from chb.jsoninterface.JSONSchemaRegistry import json_schema_registry
from chb.mips.MIPSAccess import MIPSAccess
from chb.mips.MIPSAssembly import MIPSAssembly
from chb.mips.MIPSCfgPath import MIPSCfgPath
from chb.mips.MIPSFunction import MIPSFunction
from chb.mips.MIPSInstruction import MIPSInstruction
from chb.pwr.PowerAccess import PowerAccess
import chb.cmdline.jsonresultutil as JU
import chb.cmdline.XInfo as XI
import chb.graphics.DotCfg as DC
from chb.graphics.DotCallgraph import DotCallgraph
import chb.models.FunctionSummary as F
import chb.models.ModelsAccess as M
from chb.userdata.UserHints import UserHints
from chb.util.Config import Config
import chb.util.DotGraph as DG
import chb.util.dotutil as UD
import chb.util.fileutil as UF
from chb.util.loggingutil import chklogger, LogLevel
import chb.util.xmlutil as UX
from chb.app.Instruction import Instruction
from chb.x86.X86Access import X86Access
if TYPE_CHECKING:
import chb.app.Instruction
import chb.arm.ARMInstruction
from chb.bctypes.BCCompInfo import BCCompInfo
from chb.bctypes.BCTyp import BCTypComp, BCTypFun, BCTypNamed, BCTypPtr
from chb.invariants.InvariantFact import RelationalFact
import chb.mips.MIPSInstruction
import chb.x86.X86Instruction
def print_error(m: str) -> None:
sys.stderr.write(("*" * 80) + "\n")
sys.stderr.write(m + "\n")
sys.stderr.write(("*" * 80) + "\n")
def print_status_update(m: str) -> None:
sys.stderr.write("[chkx] " + m + "\n")
def print_info(m: str) -> None:
print("-" * 80)
print(m)
print("-" * 80)
def get_path_filename(xname: str) -> Tuple[str, str]:
"""Returns a path and base filename, and checks for existence."""
name = os.path.abspath(xname)
if not os.path.isfile(name):
raise UF.CHBFileNotFoundError(name)
path = os.path.dirname(name)
filename = os.path.basename(name)
return (path, filename)
def set_logging(
level: str,
path: str,
logfilename: Optional[str],
msg: str = "",
mode: str = "a") -> None:
if level in LogLevel.all() or logfilename is not None:
if logfilename is not None:
logfilename = os.path.join(path, logfilename)
chklogger.set_chkx_logger(
msg, level=level, logfilename=logfilename, mode=mode)
def create_xinfo(path: str, xfile: str) -> XI.XInfo:
"""Determines file type using the file utility."""
xinfo = XI.XInfo()
xinfo.discover(path, xfile)
return xinfo
def get_format(name: str) -> Union[Type[PEHeader], Type[ELFHeader]]:
if name == "elf":
return ELFHeader
if name in ("pe", "pe32"):
return PEHeader
raise ValueError("Unknown format name: %s" % name)
def get_app(path: str, xfile: str, xinfo: XI.XInfo) -> AppAccess:
arch = xinfo.architecture
format = get_format(xinfo.format)
if arch == "x86":
return X86Access(path, xfile, fileformat=format)
elif arch == "mips":
return MIPSAccess(path, xfile, fileformat=format)
elif arch == "arm":
return ARMAccess(path, xfile, fileformat=format)
elif arch == "power":
return PowerAccess(path, xfile, fileformat=format)
else:
return PowerAccess(path, xfile, fileformat=format)
# raise UF.CHBError("Archicture " + arch + " not yet supported")
def get_asm(app: AppAccess) -> Assembly:
if isinstance(app, MIPSAccess):
app = cast(MIPSAccess, app)
return MIPSAssembly(app, UF.get_mips_asm_xnode(app.path, app.filename))
elif isinstance(app, ARMAccess):
return ARMAssembly(app, UF.get_arm_asm_xnode(app.path, app.filename))
else:
print_error("Simulation not yet supported for " + app.__class__.__name__)
exit(1)
def setup_directories(path: str, xfile: str) -> None:
"""Create the x.ch directories."""
def makedirc(name: str) -> None:
if os.path.isdir(name):
return
os.makedirs(name)
makedirc(UF.get_executable_dir(path, xfile))
# user data
udir = UF.get_userdata_dir(path, xfile)
ufndir = os.path.join(udir, "functions")
makedirc(udir)
makedirc(ufndir)
# analysis intermediate
adir = UF.get_analysis_dir(path, xfile)
afndir = os.path.join(adir, "functions")
makedirc(adir)
makedirc(afndir)
# results
rdir = UF.get_results_dir(path, xfile)
rfndir = os.path.join(rdir, "functions")
makedirc(rdir)
makedirc(rfndir)
def setup_user_data(
path: str,
xfile: str,
hints: List[str],
thumb: List[str],
md5: str) -> UserHints:
"""Convert hints and command-line options to xml user data."""
userhints = UserHints()
# check for registered userdata
if UF.file_has_registered_userdata(md5):
print_status_update("Use registered userdata.")
userdata = UF.get_file_registered_userdata(md5)
userhints.add_hints(userdata)
# check for registered options
if UF.file_has_registered_options(md5):
cmdline_options = UF.get_file_registered_options(md5)
if "thumb" in cmdline_options["options"]:
thumb = cmdline_options["options"]["thumb"]
print_status_update("Use command-line options for " + cmdline_options["name"] + ": ")
print_status_update(" --thumb " + " ".join(thumb))
armuserdata: Dict[str, List[str]] = {}
armuserdata["arm-thumb"] = thumb
userhints.add_hints(armuserdata)
# check direct command-line options
if len(thumb) > 0:
print_status_update("Use command-line options for thumb: ")
print_status_update(" --thumb " + " ".join(thumb))
cmdarmuserdata: Dict[str, List[str]] = {}
cmdarmuserdata["arm-thumb"] = thumb
userhints.add_hints(cmdarmuserdata)
# read hints files
# os.chdir(path)
filenames = [os.path.abspath(s) for s in hints]
if len(filenames) > 0:
print_status_update("Use hints files: " + ", ".join(filenames))
for f in filenames:
try:
with open(f, "r") as fp:
fuserdata = json.load(fp)
if "userdata" in fuserdata:
userhints.add_hints(fuserdata["userdata"])
else:
print_error(
"Expected to find userdata in " + f)
exit(1)
except Exception as e:
print_error(
"Error in reading " + f + ": " + str(e))
exit(1)
userhints.save_userdata(path, xfile)
return userhints
def prepare_executable(
path: str,
xfile: str,
doreset: bool,
doresetx: bool,
verbose: bool = False,
exclude_debug: bool = False,
hints: List[str] = [],
thumb: List[str] = []) -> UserHints:
"""Extracts executable and sets up necessary directory structure. """
xtargz = UF.get_executable_targz_filename(path, xfile)
xfilename = os.path.join(path, xfile)
if doresetx:
if os.path.isfile(xtargz):
if not os.path.isfile(xfilename):
raise UF.CHBError(
"Warning: executable file does not exist. "
+ "Not removing the extracted content file.")
else:
chdir = UF.get_ch_dir(path, xfile)
print_status_update("Remove " + xtargz)
os.remove(xtargz)
shutil.rmtree(chdir)
else:
pass
if os.path.isfile(xtargz):
chdir = UF.get_ch_dir(path, xfile)
if os.path.isdir(chdir) and not (doreset or doresetx):
# everything is in place
return UserHints() # TODO: to be changed
if os.path.isdir(chdir) and (doreset or doresetx):
# remove existing x.ch directory
print_status_update("Removing " + chdir)
shutil.rmtree(chdir)
# unpack existing targz file
if UF.unpack_tar_file(path, xfile):
print_status_update(
"Successfully extracted "
+ UF.get_executable_targz_filename(path, xfile))
else:
raise UF.CHBError("Error in unpacking tar.gz file")
# set up user data from hints files
xinfo = XI.XInfo()
xinfo.load(path, xfile)
setup_directories(path, xfile)
userhints = setup_user_data(path, xfile, hints, thumb, xinfo.md5)
return userhints
# executable content has to be extracted
else:
xinfo = create_xinfo(path, xfile)
# check architecture and file format
if not (xinfo.is_x86
or xinfo.is_power
or xinfo.is_mips
or xinfo.is_arm):
print("Architecture "
+ xinfo.architecture
+ " not supported")
if not (xinfo.is_pe32 or xinfo.is_elf):
raise UF.CHBError("File format "
+ xinfo.format
+ " not supported")
# set up directories and user data
setup_directories(path, xfile)
userhints = setup_user_data(path, xfile, hints, thumb, xinfo.md5)
xinfo.save(path, xfile)
# extract executable content
am = AnalysisManager(
path,
xfile,
xinfo.size,
mips=xinfo.is_mips,
arm=xinfo.is_arm,
power=xinfo.is_power,
elf=xinfo.is_elf,
exclude_debug=exclude_debug)
print_status_update("Extracting executable content into xml ...")
result = am.extract_executable(
chcmd="-extract", verbose=verbose)
if not (result == 0):
raise UF.CHBError("Error in extracting executable")
# save the targz file
am.save_extract()
return userhints
def analyzecmd(args: argparse.Namespace) -> NoReturn:
"""Invoke analyzer to extract, or disassemble, do full analysis."""
# arguments
xname: str = args.xname
doreset: bool = args.reset
doresetx: bool = args.resetx
exclude_debug: bool = args.exclude_debug
dodisassemble: bool = args.disassemble
savedatablocks: str = args.save_datablocks
outputfile: str = args.outputfile
doextract: bool = args.extract
verbose: bool = args.verbose
collectdiagnostics: bool = args.collect_diagnostics
failonfunctionfailure: bool = args.fail_on_function_failure
save_asm: bool = args.save_asm
save_asm_cfg_info: bool = args.save_asm_cfg_info
thumb: List[str] = args.thumb
preamble_cutoff: int = args.preamble_cutoff
iterations: int = args.iterations
analysisrepeats: int = args.analysisrepeats
deps: List[str] = args.thirdpartysummaries
so_libraries: List[str] = args.so_libraries
skip_if_asm: bool = args.skip_if_asm
skip_if_metrics: bool = args.skip_if_metrics
hints: List[str] = args.hints # names of json files
headers: List[str] = args.headers # names of c files
fns_no_lineq: List[str] = args.fns_no_lineq # function hex addresses
fns_exclude: List[str] = args.fns_exclude # function hex addresses
fns_include: List[str] = args.fns_include # function hex addresses
analyze_all_named: bool = args.analyze_all_named
analyze_range_entry_points: List[str] = args.analyze_range_entry_points
gc_compact: int = args.gc_compact
construct_all_functions: bool = args.construct_all_functions
show_function_timing: bool = args.show_function_timing
lineq_instr_cutoff: int = args.lineq_instr_cutoff
lineq_block_cutoff: int = args.lineq_block_cutoff
xssa: bool = args.ssa # use ssa in analysis
xnovarinvs: bool = args.no_varinvs # don't generate var invariants
xarmextensionregisters: bool = args.arm_extension_registers
loglevel: str = args.loglevel
logfilename: Optional[str] = args.logfilename
logfilemode: str = args.logfilemode
failonfunctionfailure = failonfunctionfailure or len(fns_include) == 1
if not os.path.isfile(Config().chx86_analyze):
print_error(
"CodeHawk analyzer executable not found.\n"
+ ("~" * 80) + "\n"
+ "Copy CHB/bchcmdline/chx86_analyze from the (compiled) "
+ "codehawk repository to the\nchb/bin/binaries/linux directory "
+ "in this directory, or\n"
+ "set up ConfigLocal.py with another location for chx86_analyze")
exit(1)
try:
(path, xfile) = get_path_filename(xname)
except UF.CHBError as e:
print(str(e.wrap()))
exit(1)
set_logging(
loglevel,
path,
logfilename=logfilename,
mode=logfilemode,
msg="analyze invoked")
if skip_if_asm and UF.has_asm_results(path, xfile):
# we have what we need
print_status_update("Skip disassembly of " + xname)
exit(0)
if skip_if_metrics and UF.has_analysis_results(path, xfile):
# we have what we need
print_status_update("Skip analysis of " + xname)
exit(0)
try:
userhints = prepare_executable(
path,
xfile,
doreset,
doresetx,
verbose=verbose,
hints=hints,
exclude_debug=exclude_debug,
thumb=thumb)
except UF.CHBError as e:
print_error(str(e.wrap()))
exit(1)
if doextract:
# we are done
chklogger.logger.info("analyze -x completed")
exit(0)
xinfo = XI.XInfo()
xinfo.load(path, xfile)
# preprocess c files
print_status_update(
"Preprocessing c header files from directory " + os.getcwd())
ifilenames: List[str] = []
headerfilenames = [os.path.abspath(s) for s in headers]
if len(headers) > 0:
for f in headerfilenames:
if os.path.isfile(f):
print_status_update("Use header file: " + f)
ifilename = f[:-2] + ".i"
ifilenames.append(ifilename)
gcccmd = ["gcc", "-std=gnu99", "-m32", "-E", "-o", ifilename, f]
p = subprocess.call(gcccmd, cwd=path, stderr=subprocess.STDOUT)
if not (p == 0):
print_error("Error in " + str(gcccmd))
exit(1)
else:
print_error("Header file " + f + " not found")
exit(1)
if analyze_all_named:
fnnamed_addrs = userhints.rev_function_names().values()
fns_include = fns_include + list(fnnamed_addrs)
chklogger.logger.info("Include %d functions", len(fns_include))
if len(analyze_range_entry_points) == 2:
festart = int(analyze_range_entry_points[0], 16)
fefin = int(analyze_range_entry_points[1], 16)
fentrypoints = userhints.function_entry_points()
feincludes: List[str] = []
for fe in fentrypoints:
eint = int(fe, 16)
if eint >= festart and eint <= fefin:
feincludes.append(fe)
fns_include = fns_include + feincludes
chklogger.logger.info(
"Include %d entry point functions in range %s - %s",
len(feincludes), hex(festart), hex(fefin))
am = AnalysisManager(
path,
xfile,
xinfo.size,
mips=xinfo.is_mips,
arm=xinfo.is_arm,
power=xinfo.is_power,
elf=xinfo.is_elf,
savedatablocks=(savedatablocks is not None),
deps=deps,
so_libraries=so_libraries,
ifilenames=ifilenames,
fns_no_lineq=fns_no_lineq,
fns_exclude=fns_exclude,
fns_include=fns_include,
gc_compact=gc_compact,
show_function_timing=show_function_timing,
lineq_instr_cutoff=lineq_instr_cutoff,
lineq_block_cutoff=lineq_block_cutoff,
use_ssa=xssa,
no_varinvs=xnovarinvs,
include_arm_extension_registers=xarmextensionregisters,
thumb=(len(thumb) > 0))
if dodisassemble:
try:
am.disassemble(
verbose=verbose,
collectdiagnostics=collectdiagnostics,
preamble_cutoff=preamble_cutoff,
save_asm_cfg_info=save_asm_cfg_info)
except subprocess.CalledProcessError as e:
print_error(str(e.output))
print_error(str(e))
exit(1)
except UF.CHBError as e:
print_error(str(e.wrap()))
exit(1)
if savedatablocks is not None and outputfile is not None:
(startaddr, endaddr) = savedatablocks.split(":")
app = get_app(path, xfile, xinfo)
systeminfo = app.systeminfo
datablocks = systeminfo.datablocks.datablocks_in_range(
startaddr, endaddr)
userdata: Dict[str, Any] = {}
udata = userdata["userdata"] = {}
dbdata = udata["data-blocks"] = []
for db in datablocks:
dbrec: Dict = {}
dbrec["r"] = [db.startaddr, db.endaddr]
dbdata.append(dbrec)
with open(outputfile + ".json", "w") as fp:
json.dump(userdata, fp, indent=2)
chklogger.logger.info("analyze -d completed")
exit(0)
else:
try:
am.analyze(
analysisrepeats=analysisrepeats,
iterations=iterations,
verbose=verbose,
save_asm=save_asm,
construct_all_functions=construct_all_functions,
collectdiagnostics=collectdiagnostics,
failonfunctionfailure=failonfunctionfailure,
preamble_cutoff=preamble_cutoff)
except subprocess.CalledProcessError as e:
print_error(
"Analysis failed.\n Return code: "
+ str(e.returncode)
+ "\n Command: "
+ str(e.cmd)
+ "\n Output: "
+ str(e.output)
+ "\n Stderr: "
+ str(e.stderr)
+ "\n"
+ ("-" * 80)
+ "\n"
+ str(e))
exit(1)
except UF.CHBError as e:
print_error(str(e.wrap()))
exit(1)
chklogger.logger.info("analyze completed")
exit(0)
def results_stats(args: argparse.Namespace) -> NoReturn:
"""Prints out a summary of the analysis results per function."""
# arguments
xname: str = str(args.xname)
nocallees: bool = args.nocallees
sortby: str = args.sortby
timeshare: int = args.timeshare
opcodes: str = args.opcodes
functionmetrics: str = args.functionmetrics
hide: List[str] = args.hide
tagfile: Optional[str] = args.tagfile
loglevel: str = args.loglevel
logfilename: Optional[str] = args.logfilename
logfilemode: str = args.logfilemode
try:
(path, xfile) = get_path_filename(xname)
UF.check_analysis_results(path, xfile)
except UF.CHBError as e:
print_error(str(e.wrap()))
exit(1)
set_logging(
loglevel,
path,
logfilename=logfilename,
mode=logfilemode,
msg="results stats invoked")
tagdata: Dict[str, Any] = {}
if tagfile is not None:
with open(tagfile, "r") as fp:
tagdata = json.load(fp)
functiontags: Dict[str, List[str]] = {}
if "function-tags" in tagdata:
functiontags = tagdata["function-tags"]
maxlen = 0
for (faddr, keys) in functiontags.items():
taglen = len(",".join(keys))
if taglen > maxlen:
maxlen = taglen
maxlen = maxlen + 4 if maxlen > 0 else 0
xinfo = XI.XInfo()
xinfo.load(path, xfile)
app = get_app(path, xfile, xinfo)
stats = app.result_metrics
print(stats.header_to_string(hide=hide))
if sortby == "instrs":
sortkey = lambda f: f.instruction_count
elif sortby == "basicblocks":
sortkey = lambda f: (f.block_count, f.instruction_count, int(f.faddr, 16))
elif sortby == "loopdepth":
sortkey = lambda f: f.loop_depth
elif sortby == "time":
sortkey = lambda f: f.time
else:
sortkey = lambda f: int(f.faddr, 16)
for f in sorted(stats.get_function_results(), key=sortkey):
if f.faddr in functiontags:
fn_tags = functiontags[f.faddr]
else:
fn_tags = []
if "hide" in fn_tags:
continue
print(f.metrics_to_string(shownocallees=nocallees, hide=hide,
tags=fn_tags, taglen=maxlen))
print(stats.disassembly_to_string())
print(stats.analysis_to_string())
if timeshare > 0:
topanalysistimes = stats.time_share(timeshare)
toptotal = sum(topanalysistimes.values())
print("\nFunctions taking up most analysis time:")
print("\nAddress share (%)")
print("-----------------------")
for (s, t) in topanalysistimes.items():
print(s.ljust(14) + "{:4.2f}".format(100.0 * t).rjust(6))
print("-----------------------")
print("Total".ljust(14) + "{:4.2f}".format(100.0 * toptotal).rjust(6))
if functionmetrics:
filename = functionmetrics + ".json"
content: Dict[str, Any] = {}
content["filename"] = xname
content["functions"] = []
for f in sorted(stats.get_function_results(), key=lambda f: f.faddr):
content["functions"].append(f.to_json_result().content)
jsonok = JU.jsonok("functionmetrics", content)
with open(filename, "w") as fp:
json.dump(jsonok, fp, indent=4)
if opcodes:
filename = opcodes + ".json"
opcstats = app.mnemonic_stats()
if xinfo.is_arm:
opccovered = Config().armopcodes
with open(opccovered, "r") as fp:
opcsupport: Dict[str, Any] = json.load(fp)["instructions"]
else:
opcsupport = {}
result: Dict[str, Any] = {}
result["name"] = xname
result["md5"] = xinfo.md5
result["opcodes"] = {}
for (opc, count) in sorted(opcstats.items()):
opcrec: Dict[str, Any] = {}
opcrec["count"] = count
opcrec["support"] = []
if opc in opcsupport:
if "ASTC" in opcsupport[opc]:
if opcsupport[opc]["ASTC"] == "Y":
opcrec["support"].append("ASTC")
result["opcodes"][opc] = opcrec
with open(filename, "w") as fp:
json.dump(result, fp, sort_keys=True, indent=2)
chklogger.logger.info("opcodes saved to " + filename)
print("\nOpcode stats")
print("-" * 80)
for (opc, opcr) in sorted(result["opcodes"].items()):
p_opcsupport = ""
if len(opcr["support"]) > 0:
p_opcsupport = "(" + ", ".join(opcr["support"]) + ")"
print(opc.ljust(10) + ": "
+ str(opcr["count"]).rjust(6)
+ " " + p_opcsupport)
print("=" * 80)
chklogger.logger.info("results stats completed")
exit(0)
def results_callgraph(args: argparse.Namespace) -> NoReturn:
"""Generates a callgraph in dot."""
# arguments
xname: str = args.xname
output: str = args.output
hidelibs: bool = args.hide_lib_functions
hideunknowns: bool = args.hide_unknown_targets
reverse: bool = args.reverse
align: str = args.align
sources: List[str] = args.sources
sinks: List[str] = args.sinks
loglevel: str = args.loglevel
logfilename: Optional[str] = args.logfilename
logfilemode: str = args.logfilemode
try:
(path, xfile) = get_path_filename(xname)
UF.check_analysis_results(path, xfile)
except UF.CHBError as e:
print(str(e.wrap()))
exit(1)
set_logging(
loglevel,
path,
logfilename=logfilename,
mode=logfilemode,
msg="results callgraph invoked")
xinfo = XI.XInfo()
xinfo.load(path, xfile)
app = get_app(path, xfile, xinfo)
def getcolor(node: CallgraphNode) -> str:
if node.is_lib_node:
return "green"
elif node.is_unknown_tgt:
return "yellow"
elif node.is_call_back_table_node or node.is_tagged_app_node:
return "orange"
else:
return "lightblue"
def nodefilter(node: CallgraphNode) -> bool:
if hidelibs and node.is_lib_node:
return False
elif hideunknowns and node.is_unknown_tgt:
return False
else:
return True
callgraph = app.callgraph()
if len(sources) > 0:
callgraph = callgraph.constrain_sources(sources)
if len(sinks) > 0:
callgraph = callgraph.constrain_sinks(sinks)
def sameleftrank(node: CallgraphNode) -> bool:
return callgraph.is_root_node(node.name)
def samerightrank(node: CallgraphNode) -> bool:
return callgraph.is_sink_node(node.name)
samerank: List[Callable[[CallgraphNode], bool]] = []
if align == "left":
samerank = [sameleftrank]
elif align == "right":
samerank = [samerightrank]
elif align == "both":
samerank = [sameleftrank, samerightrank]
dotgraph = DotCallgraph(
"callgraph",
callgraph,
reverse=reverse,
getcolor=getcolor,
nodefilter=nodefilter,
samerank=samerank).to_dotgraph()
pdffilename = UD.print_dot(app.path, output, dotgraph)
if os.path.isfile(pdffilename):
print_info(
"Call graph for " + xname + " has been saved in " + pdffilename)
else:
print_error(
"Error in converting dot file to pdf: file "
+ pdffilename
+ " not found")
exit(1)
chklogger.logger.info("results callgraph completed")
exit(0)
def results_globalvars(args: argparse.Namespace) -> NoReturn:
"""Prints out global variables being read and written per function."""
# arguments
xname: str = str(args.xname)
try:
(path, xfile) = get_path_filename(xname)
UF.check_analysis_results(path, xfile)
except UF.CHBError as e:
print(str(e.wrap()))
exit(1)
xinfo = XI.XInfo()
xinfo.load(path, xfile)
app = get_app(path, xfile, xinfo)
print("Base address: " + app.header.image_base)
print("Max address: " + app.header.max_address_space)
(lhsglobals, rhsglobals) = app.global_refs()
lhsdir: Dict[str, Dict[str, int]] = {}
rhsdir: Dict[str, Dict[str, int]] = {}
print("Global variables that get assigned:")
print("-----------------------------------")
for faddr in sorted(lhsglobals):
print("Function " + faddr)
for v in lhsglobals[faddr]:
print(" " + str(v))
lhsdir.setdefault(str(v), {})
lhsdir[str(v)].setdefault(faddr, 0)
lhsdir[str(v)][faddr] += 1
total_lhs_assignments = 0
for faddr in lhsglobals:
total_lhs_assignments += len(lhsglobals[faddr])
for gv in sorted(lhsdir):
print("\nGlobal variable " + gv)
for faddr in sorted(lhsdir[gv]):
print(" " + faddr + ": " + str(lhsdir[gv][faddr]))
print("\nGlobal variables that are referenced:")
print("---------------------------------------")
for faddr in sorted(rhsglobals):
print("Function " + faddr)
for x in rhsglobals[faddr]:
print(" " + str(x))
rhsdir.setdefault(str(x), {})
rhsdir[str(x)].setdefault(faddr, 0)
rhsdir[str(x)][faddr] += 1
print("\nGlobal expressions (" + str(len(rhsdir)) + ")")
print("---------------------------------------")
for gx in sorted(rhsdir):
print("\nGlobal expression " + gx)
for faddr in sorted(rhsdir[gx]):
print(" " + faddr + ": " + str(rhsdir[gx][faddr]))
print("\nGlobal variable statistics")
print("---------------------------------------")
print("Number of functions in which globals get assigned: "
+ str(len(lhsglobals)))
print("Total number of global variable assignments: "
+ str(total_lhs_assignments))
print("Total number of functions in which globals get referenced: "
+ str(len(rhsglobals)))
exit(0)
def results_classifyfunctions(args: argparse.Namespace) -> NoReturn:
"""Returns a classification of function based on a classifier."""
xname: str = str(args.xname)
classificationfile: str = str(args.classification_file)
with open(classificationfile, "r") as fp:
classifier = json.load(fp)
revclassifier: Dict[str, str] = {}
for category in classifier["classification"]:
for libfun in classifier["classification"][category]:
revclassifier[libfun] = category
try:
(path, xfile) = get_path_filename(xname)
UF.check_analysis_results(path, xfile)
except UF.CHBError as e:
print(str(e.wrap()))
exit(1)
xinfo = XI.XInfo()
xinfo.load(path, xfile)
app = get_app(path, xfile, xinfo)
fns = app.appfunction_addrs
classification: Dict[str, Dict[str, int]] = {} # faddr -> libcat -> count
for faddr in fns:
classification.setdefault(faddr, {})
f = app.function(faddr)
fcalls = f.call_instructions()
for baddr in fcalls:
for instr in fcalls[baddr]:
tgtname = instr.call_target.name
if tgtname in revclassifier:
category = revclassifier[tgtname]
classification[faddr].setdefault(category, 0)
classification[faddr][category] += 1
catfprevalence: Dict[str, int] = {}
catcprevalence: Dict[str, int] = {}
catstats: Dict[int, int] = {}
singlecat: Dict[str, int] = {}
doublecat: Dict[Tuple[str, str], int] = {}
for faddr in classification:
for cat in classification[faddr]:
catfprevalence.setdefault(cat, 0)
catcprevalence.setdefault(cat, 0)
catfprevalence[cat] += 1
catcprevalence[cat] += classification[faddr][cat]
numcats = len(classification[faddr])
catstats.setdefault(numcats, 0)
catstats[numcats] += 1
if numcats == 1:
cat = list(classification[faddr].keys())[0]
singlecat.setdefault(cat, 0)
singlecat[cat] += 1
if numcats == 2:
cats = sorted(list(classification[faddr].keys()))
cattuple = (cats[0], cats[1])
doublecat.setdefault(cattuple, 0)
doublecat[cattuple] += 1
for (m, c) in sorted(catstats.items()):
print(str(m).rjust(5) + ": " + str(c).rjust(5))
print("\nSingle category")
for (cat, count) in sorted(singlecat.items()):
print(str(count).rjust(5) + " " + cat)
print("\nDouble category")