Skip to content

Commit 39a5e15

Browse files
Include skiplist file in the analysis info
The skiplist file is already included in the store ZIP, but it is not currently stored in the database. This patch introduces a new table called AnalysisInfoFile, which is intended to store files related to analysis information. The actual file contents are stored separately in the FileContent table. With this PR, only the skipfile is included as an analysis info file, but this could be extended later.
1 parent 5ce99e3 commit 39a5e15

8 files changed

Lines changed: 178 additions & 38 deletions

File tree

web/server/codechecker_server/api/mass_store_run.py

Lines changed: 53 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
from collections import defaultdict
1616
from datetime import datetime, timedelta
1717
import fnmatch
18+
import hashlib
1819
from hashlib import sha256
1920
import json
2021
import os
2122
from pathlib import Path
2223
import sqlalchemy
24+
from sqlalchemy.orm import Session as SA_Session
2325
import tempfile
2426
import time
2527
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, \
@@ -46,7 +48,7 @@
4648
from ..database.config_db_model import Product
4749
from ..database.database import DBSession
4850
from ..database.run_db_model import \
49-
AnalysisInfo, AnalysisInfoChecker, AnalyzerStatistic, \
51+
AnalysisInfo, AnalysisInfoChecker, AnalysisInfoFile, AnalyzerStatistic, \
5052
BugPathEvent, BugReportPoint, \
5153
Checker, \
5254
ExtendedReportData, \
@@ -814,8 +816,8 @@ def __add_file_content(
814816
self,
815817
session: DBSession,
816818
source_file_name: str,
817-
content_hash: Optional[str]
818-
):
819+
content_hash: Optional[str] = None
820+
) -> str:
819821
"""
820822
Add the necessary file contents. If content_hash in None then this
821823
function calculates the content hash. Or if it's available at the
@@ -871,6 +873,8 @@ def __add_file_content(
871873
# the meantime.
872874
session.rollback()
873875

876+
return content_hash
877+
874878
def __store_checker_identifiers(self, checkers: Set[Tuple[str, str]]):
875879
"""
876880
Stores the identifiers "(analyzer, checker_name)" in the database into
@@ -1000,6 +1004,28 @@ def __store_analysis_statistics(
10001004

10011005
session.add(analyzer_statistics)
10021006

1007+
def __store_analysis_info_files(
1008+
self,
1009+
session: SA_Session,
1010+
analysis_info_id: int,
1011+
report_dir_path: str
1012+
):
1013+
""" Store analyzer related config files (e.g. skipfile) """
1014+
zip_report_dir = os.path.join(
1015+
self._zip_dir, "reports",
1016+
hashlib.md5(report_dir_path.encode('utf-8')).hexdigest())
1017+
1018+
skip_file = os.path.join(zip_report_dir, 'skip_file')
1019+
if os.path.isfile(skip_file):
1020+
content_hash = self.__add_file_content(session, skip_file)
1021+
1022+
if (not session.get(AnalysisInfoFile,
1023+
(analysis_info_id, content_hash))):
1024+
session.add(AnalysisInfoFile(
1025+
analysis_info_id=analysis_info_id,
1026+
filename="skip_file",
1027+
content_hash=content_hash))
1028+
10031029
def __store_analysis_info(
10041030
self,
10051031
session: DBSession,
@@ -1012,37 +1038,30 @@ def __store_analysis_info(
10121038
analyzer_command.encode("utf-8"),
10131039
zlib.Z_BEST_COMPRESSION)
10141040

1015-
analysis_info_rows = session \
1016-
.query(AnalysisInfo) \
1017-
.filter(AnalysisInfo.analyzer_command == cmd) \
1018-
.all()
1019-
1020-
if analysis_info_rows:
1021-
# It is possible when multiple runs are stored
1022-
# simultaneously to the server with the same analysis
1023-
# command that multiple entries are stored into the
1024-
# database. In this case we will select the first one.
1025-
analysis_info = analysis_info_rows[0]
1026-
else:
1027-
analysis_info = AnalysisInfo(analyzer_command=cmd)
1028-
1029-
# Obtain the ID eagerly to be able to use the M-to-N table.
1030-
session.add(analysis_info)
1031-
session.flush()
1032-
session.refresh(analysis_info, ["id"])
1033-
1034-
for analyzer in mip.analyzers:
1035-
q = session \
1036-
.query(Checker) \
1037-
.filter(Checker.analyzer_name == analyzer)
1038-
db_checkers = {r.checker_name: r for r in q.all()}
1039-
1040-
connection_rows = [AnalysisInfoChecker(
1041-
analysis_info, db_checkers[chk], is_enabled)
1042-
for chk, is_enabled
1043-
in mip.checkers.get(analyzer, {}).items()]
1044-
for r in connection_rows:
1045-
session.add(r)
1041+
analysis_info = AnalysisInfo(analyzer_command=cmd)
1042+
1043+
# Obtain the ID eagerly to be able to use the M-to-N table.
1044+
session.add(analysis_info)
1045+
session.flush()
1046+
session.refresh(analysis_info, ["id"])
1047+
1048+
for analyzer in mip.analyzers:
1049+
q = session \
1050+
.query(Checker) \
1051+
.filter(Checker.analyzer_name == analyzer)
1052+
db_checkers = {r.checker_name: r for r in q.all()}
1053+
1054+
connection_rows = [AnalysisInfoChecker(
1055+
analysis_info, db_checkers[chk], is_enabled)
1056+
for chk, is_enabled
1057+
in mip.checkers.get(analyzer, {}).items()]
1058+
for r in connection_rows:
1059+
session.add(r)
1060+
1061+
if mip.report_dir_path:
1062+
self.__store_analysis_info_files(session,
1063+
analysis_info.id,
1064+
mip.report_dir_path)
10461065

10471066
run_history.analysis_info.append(analysis_info)
10481067
self.__analysis_info[src_dir_path] = analysis_info

web/server/codechecker_server/api/report_server.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@
6363
from ..database.config_db_model import Product
6464
from ..database.database import conv, DBSession, escape_like
6565
from ..database.run_db_model import \
66-
AnalysisInfo, AnalysisInfoChecker as DB_AnalysisInfoChecker, \
66+
AnalysisInfo, \
67+
AnalysisInfoChecker as DB_AnalysisInfoChecker, AnalysisInfoFile, \
6768
AnalyzerStatistic, \
6869
BugPathEvent, BugReportPoint, \
6970
CleanupPlan, CleanupPlanReportHash, Checker, Comment, \
@@ -1723,6 +1724,19 @@ def getAnalysisInfo(self, analysis_info_filter, limit, offset):
17231724
checkers[analyzer][checker] = API_AnalysisInfoChecker(
17241725
enabled=enabled)
17251726

1727+
analysis_config_files = session \
1728+
.query(AnalysisInfoFile.filename,
1729+
FileContent.content) \
1730+
.join(FileContent, AnalysisInfoFile.content_hash
1731+
== FileContent.content_hash) \
1732+
.filter(AnalysisInfoFile.analysis_info_id
1733+
== cmd.id).all()
1734+
1735+
# Append analysis files to the command string
1736+
for filename, content in analysis_config_files:
1737+
command += f"\n\n{filename}:\n"
1738+
command += zlib.decompress(content).decode("utf-8")
1739+
17261740
res.append(ttypes.AnalysisInfo(
17271741
analyzerCommand=html.escape(command),
17281742
checkers=checkers))

web/server/codechecker_server/database/db_cleanup.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from typing import Dict
1414

1515
import sqlalchemy
16+
from sqlalchemy import union
1617

1718
from codechecker_api.codeCheckerDBAccess_v6.ttypes import Severity
1819

@@ -21,7 +22,7 @@
2122

2223
from .database import DBSession
2324
from .run_db_model import \
24-
AnalysisInfo, \
25+
AnalysisInfo, AnalysisInfoFile, \
2526
BugPathEvent, BugReportPoint, \
2627
Comment, Checker, \
2728
File, FileContent, \
@@ -108,8 +109,9 @@ def remove_unused_files(product):
108109
if total_count:
109110
LOG.debug("%d dangling files deleted.", total_count)
110111

111-
files = session.query(File.content_hash) \
112-
.group_by(File.content_hash)
112+
files = union(
113+
session.query(File.content_hash),
114+
session.query(AnalysisInfoFile.content_hash))
113115

114116
session.query(FileContent) \
115117
.filter(FileContent.content_hash.notin_(files)) \

web/server/codechecker_server/database/run_db_model.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,41 @@ def __init__(self,
7979
self.enabled = is_enabled
8080

8181

82+
class AnalysisInfoFile(Base):
83+
__tablename__ = "analysis_info_files"
84+
85+
analysis_info_id = Column(Integer,
86+
ForeignKey("analysis_info.id",
87+
deferrable=True,
88+
initially="DEFERRED",
89+
ondelete="CASCADE"),
90+
primary_key=True)
91+
92+
filename = Column(String, nullable=False)
93+
94+
content_hash = Column(String,
95+
ForeignKey("file_contents.content_hash",
96+
deferrable=True,
97+
initially="DEFERRED",
98+
ondelete="CASCADE"),
99+
primary_key=True)
100+
101+
def __init__(self,
102+
analysis_info_id: int,
103+
filename: str,
104+
content_hash: str):
105+
self.analysis_info_id = analysis_info_id
106+
self.filename = filename
107+
self.content_hash = content_hash
108+
109+
82110
class AnalysisInfo(Base):
83111
__tablename__ = "analysis_info"
84112

85113
id = Column(Integer, autoincrement=True, primary_key=True)
86114
analyzer_command = Column(LargeBinary)
87115
available_checkers = relationship(AnalysisInfoChecker, uselist=True)
116+
analyzer_files = relationship(AnalysisInfoFile, uselist=True)
88117

89118
def __init__(self, analyzer_command: bytes):
90119
self.analyzer_command = analyzer_command

web/server/codechecker_server/metadata.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ def __init__(self, metadata_file_path):
6666
self.disabled_checkers: DisabledCheckers = set()
6767
self.checker_to_analyzer: CheckerToAnalyzer = {}
6868

69+
self.report_dir_path = None
70+
6971
self.__metadata_dict: Dict[str, Any] = {}
7072
if os.path.isfile(metadata_file_path):
7173
self.__metadata_dict = cast(Dict[str, Any],
@@ -184,6 +186,9 @@ def __process_metadata_info_v2(self):
184186
if tool['name'] == 'codechecker' and 'version' in tool:
185187
cc_versions.add(tool['version'])
186188

189+
if tool['name'] == 'codechecker':
190+
self.report_dir_path = tool.get('output_path')
191+
187192
if 'command' in tool:
188193
check_commands.add(' '.join(tool['command']))
189194

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
"""
2+
Add analysis_info_files table
3+
4+
Revision ID: 29e5047b6513
5+
Revises: 198654dac219
6+
Create Date: 2026-03-05 17:35:36.286847
7+
"""
8+
9+
from logging import getLogger
10+
11+
from alembic import op
12+
import sqlalchemy as sa
13+
14+
15+
# Revision identifiers, used by Alembic.
16+
revision = '29e5047b6513'
17+
down_revision = '198654dac219'
18+
branch_labels = None
19+
depends_on = None
20+
21+
22+
def upgrade():
23+
LOG = getLogger("migration/report")
24+
# ### commands auto generated by Alembic - please adjust! ###
25+
op.create_table(
26+
'analysis_info_files',
27+
sa.Column('analysis_info_id', sa.Integer(), nullable=False),
28+
sa.Column('filename', sa.String(), nullable=False),
29+
sa.Column('content_hash', sa.String(), nullable=False),
30+
sa.ForeignKeyConstraint(
31+
['analysis_info_id'], ['analysis_info.id'],
32+
name=op.f(
33+
'fk_analysis_info_files_analysis_info_id_analysis_info'),
34+
ondelete='CASCADE', initially='DEFERRED', deferrable=True),
35+
sa.ForeignKeyConstraint(
36+
['content_hash'], ['file_contents.content_hash'],
37+
name=op.f(
38+
'fk_analysis_info_files_content_hash_file_contents'),
39+
ondelete='CASCADE', initially='DEFERRED', deferrable=True),
40+
sa.PrimaryKeyConstraint(
41+
'analysis_info_id', 'content_hash',
42+
name=op.f('pk_analysis_info_files'))
43+
)
44+
# ### end Alembic commands ###
45+
46+
47+
def downgrade():
48+
LOG = getLogger("migration/report")
49+
# ### commands auto generated by Alembic - please adjust! ###
50+
op.drop_table('analysis_info_files')
51+
# ### end Alembic commands ###
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
-*.txt

web/tests/functional/store/test_store.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,8 @@ def test_store_multiple_report_dirs(self):
264264
cfg['reportdir'] = report_dir1
265265
cfg['checkers'] = [
266266
'-d', 'core.DivideZero', '-e', 'deadcode.DeadStores']
267+
cfg['skip_file'] = os.path.join(self._divide_zero_workspace,
268+
'skipfile')
267269
codechecker.analyze(cfg, self._divide_zero_workspace)
268270

269271
with open(os.path.join(report_dir1, 'metadata.json'), 'r+',
@@ -278,6 +280,7 @@ def test_store_multiple_report_dirs(self):
278280
cfg['reportdir'] = report_dir2
279281
cfg['checkers'] = [
280282
'-e', 'core.DivideZero', '-d', 'deadcode.DeadStores']
283+
cfg.pop('skip_file')
281284
codechecker.analyze(cfg, self._divide_zero_workspace)
282285

283286
def store_multiple_report_dirs(report_dirs):
@@ -323,12 +326,28 @@ def store_multiple_report_dirs(report_dirs):
323326
analysis_info_filter = AnalysisInfoFilter(runId=report['runId'])
324327
analysis_info = self._cc_client.getAnalysisInfo(
325328
analysis_info_filter, limit, offset)
329+
326330
self.assertEqual(len(analysis_info), 2)
327331
self.assertTrue(
328332
any(report_dir1 in i.analyzerCommand for i in analysis_info))
329333
self.assertTrue(
330334
any(report_dir2 in i.analyzerCommand for i in analysis_info))
331335

336+
# Skip file content
337+
skip_file_info = "skip_file:\n-*.txt"
338+
339+
# During the analysis of report_dir1, we used a skipfile,
340+
# and the skipfile content should appear in the analyzer command.
341+
self.assertTrue(all(skip_file_info in
342+
i.analyzerCommand for i in analysis_info
343+
if report_dir1 in i.analyzerCommand))
344+
345+
# No skipfile was used during the analysis of report_dir2,
346+
# so we shouldn't see skipfile content in this case.
347+
self.assertFalse(any(skip_file_info in
348+
i.analyzerCommand for i in analysis_info
349+
if report_dir2 in i.analyzerCommand))
350+
332351
self.assertTrue(all(
333352
'<' not in i.analyzerCommand for i in analysis_info))
334353
self.assertTrue(any(

0 commit comments

Comments
 (0)