Skip to content

Commit 6183e73

Browse files
authored
Merge pull request #884 from juanjemdIos/master
author or maintainer of dockerfile. Fixes #725
2 parents f373fc9 + 533a88a commit 6183e73

6 files changed

Lines changed: 206 additions & 55 deletions

File tree

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import logging
2+
import os
3+
import re
4+
from ..utils import constants
5+
6+
def extract_dockerfile_maintainer(file_path):
7+
print(f"Extracting maintainers from Dockerfile: {file_path}")
8+
maintainers = []
9+
unique_maintainers = []
10+
try:
11+
with open(file_path, "rb") as file:
12+
raw_data = file.read()
13+
14+
try:
15+
content = raw_data.decode("utf-8")
16+
except UnicodeDecodeError:
17+
logging.warning(f"File {file_path} is not UTF-8 decodable. Skipping.")
18+
return maintainers
19+
20+
# not sure if should be better property author or a new property of maintainer
21+
oci_match = re.findall(
22+
constants.REGEXP_MAINTAINER_LABEL_OCI,
23+
content,
24+
re.IGNORECASE | re.MULTILINE
25+
)
26+
# LABEL maintainer free
27+
label_match = re.findall(
28+
constants.REGEXP_MAINTAINER_LABEL_FREE,
29+
content,
30+
re.IGNORECASE | re.MULTILINE
31+
)
32+
# Deprecated maintainer
33+
maintainer_match = re.findall(
34+
constants.REGEXP_MAINTAINER,
35+
content,
36+
re.IGNORECASE | re.MULTILINE
37+
)
38+
39+
maintainers.extend(oci_match)
40+
maintainers.extend(label_match)
41+
maintainers.extend(maintainer_match)
42+
43+
unique_maintainers = list({m.strip() for m in maintainers if m.strip()})
44+
except OSError:
45+
logging.warning(f"Could not read Dockerfile {file_path}")
46+
47+
return unique_maintainers

src/somef/process_files.py

Lines changed: 27 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from .parser.description_parser import parse_description_file
2222
from .parser.toml_parser import parse_toml_file
2323
from .parser.cabal_parser import parse_cabal_file
24+
from .parser.dockerfile_parser import extract_dockerfile_maintainer
2425
from chardet import detect
2526

2627

@@ -77,16 +78,34 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner
7778
repo_relative_path, filename)
7879
if filename == "Dockerfile":
7980
format_file = constants.FORMAT_DOCKERFILE
81+
maintainers = extract_dockerfile_maintainer(os.path.join(repo_dir, file_path))
8082
else:
8183
format_file = constants.FORMAT_DOCKER_COMPOSE
82-
metadata_result.add_result(constants.CAT_HAS_BUILD_FILE,
83-
{
84-
constants.PROP_VALUE: docker_url,
85-
constants.PROP_TYPE: constants.URL,
86-
constants.PROP_FORMAT: format_file
87-
},
88-
1,
89-
constants.TECHNIQUE_FILE_EXPLORATION, docker_url)
84+
maintainers = None
85+
86+
result_value = {
87+
constants.PROP_VALUE: docker_url,
88+
constants.PROP_TYPE: constants.URL,
89+
constants.PROP_FORMAT: format_file
90+
}
91+
if maintainers:
92+
result_value[constants.PROP_AUTHOR] = maintainers
93+
94+
metadata_result.add_result(
95+
constants.CAT_HAS_BUILD_FILE,
96+
result_value,
97+
1,
98+
constants.TECHNIQUE_FILE_EXPLORATION,
99+
docker_url
100+
)
101+
# metadata_result.add_result(constants.CAT_HAS_BUILD_FILE,
102+
# {
103+
# constants.PROP_VALUE: docker_url,
104+
# constants.PROP_TYPE: constants.URL,
105+
# constants.PROP_FORMAT: format_file
106+
# },
107+
# 1,
108+
# constants.TECHNIQUE_FILE_EXPLORATION, docker_url)
90109
if filename.lower().endswith(".ipynb"):
91110
notebook_url = get_file_link(repo_type, file_path, owner, repo_name, repo_default_branch, repo_dir,
92111
repo_relative_path, filename)
@@ -652,50 +671,3 @@ def clean_text(text):
652671
cleaned_lines.append(line)
653672
return "\n".join(cleaned_lines)
654673

655-
# """
656-
# Proccess a text with possible authors
657-
# """
658-
# if not author_str:
659-
# return []
660-
661-
# authors = []
662-
663-
# for line in author_str.splitlines():
664-
# line = line.strip()
665-
# if not line or line.startswith("#"):
666-
# continue
667-
668-
# email_match = re.search(r'<([^>]+)>', line)
669-
# if email_match:
670-
# email = email_match.group(1)
671-
# name = line[:email_match.start()].strip()
672-
# else:
673-
# name = line
674-
# email = None
675-
676-
# if name:
677-
# if re.search(constants.REGEXP_LTD_INC, name, re.IGNORECASE):
678-
# type_author = "Organization"
679-
# author_info = {
680-
# "name": name,
681-
# "email": email,
682-
# "value": name,
683-
# "type": type_author
684-
# }
685-
# else:
686-
# type_author = "Person"
687-
# name_parts = name.split()
688-
# given_name = name_parts[0] if name_parts else None
689-
# last_name = " ".join(name_parts[1:]) if len(name_parts) > 1 else None
690-
# author_info = {
691-
# "name": name,
692-
# "email": email,
693-
# "value": name,
694-
# "type": type_author,
695-
# "given_name": given_name,
696-
# "last_name": last_name
697-
# }
698-
699-
# authors.append(author_info)
700-
701-
# return authors

src/somef/test/test_JSON_export.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,5 +470,85 @@ def test_issue_859(self):
470470
os.remove(test_data_path + "test-859.json")
471471

472472

473+
def test_issue_725(self):
474+
"""Checks if this repository has authors extracted from Dockerfile"""
475+
476+
somef_cli.run_cli(threshold=0.8,
477+
ignore_classifiers=False,
478+
repo_url=None,
479+
local_repo=test_data_repositories + "Fairwinds",
480+
doc_src=None,
481+
in_file=None,
482+
output=test_data_path + "test_issue_725.json",
483+
graph_out=None,
484+
graph_format="turtle",
485+
codemeta_out=None,
486+
pretty=True,
487+
missing=False,
488+
readme_only=False)
489+
490+
text_file = open(test_data_path + "test_issue_725.json", "r")
491+
data = text_file.read()
492+
text_file.close()
493+
json_content = json.loads(data)
494+
495+
has_built = json_content.get("has_build_file", [])
496+
497+
authors = []
498+
for entry in has_built:
499+
result = entry.get("result", {})
500+
if "author" in result:
501+
authors.extend(result["author"])
502+
503+
expected_author = "FairwindsOps, Inc."
504+
505+
assert expected_author in authors, (
506+
f"Expected author '{expected_author}' not found. "
507+
f"Authors found: {authors}"
508+
)
509+
os.remove(test_data_path + "test_issue_725.json")
510+
511+
def test_issue_725_2(self):
512+
"""Checks if this repository has authors extracted from Dockerfile"""
513+
514+
somef_cli.run_cli(threshold=0.8,
515+
ignore_classifiers=False,
516+
repo_url=None,
517+
local_repo=test_data_repositories + "Prometeus",
518+
doc_src=None,
519+
in_file=None,
520+
output=test_data_path + "test_issue_725_2.json",
521+
graph_out=None,
522+
graph_format="turtle",
523+
codemeta_out=None,
524+
pretty=True,
525+
missing=False,
526+
readme_only=False)
527+
528+
text_file = open(test_data_path + "test_issue_725_2.json", "r")
529+
data = text_file.read()
530+
text_file.close()
531+
json_content = json.loads(data)
532+
533+
has_built = json_content.get("has_build_file", [])
534+
535+
authors = []
536+
for entry in has_built:
537+
result = entry.get("result", {})
538+
if "author" in result:
539+
authors.extend(result["author"])
540+
541+
expected_author = "The Prometheus Authors"
542+
543+
assert expected_author in authors, (
544+
f"Expected author '{expected_author}' not found. "
545+
f"Authors found: {authors}"
546+
)
547+
expected_count = 2
548+
assert len(authors) == expected_count, (
549+
f"Expected {expected_count} authors, but found {len(authors)}: {authors}"
550+
)
551+
os.remove(test_data_path + "test_issue_725_2.json")
552+
473553
if __name__ == '__main__':
474554
unittest.main()
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
FROM alpine:3.23
2+
3+
LABEL org.opencontainers.image.authors="FairwindsOps, Inc." \
4+
org.opencontainers.image.vendor="FairwindsOps, Inc." \
5+
org.opencontainers.image.title="Nova" \
6+
org.opencontainers.image.description="Nova is a cli tool to find outdated or deprecated Helm charts running in your Kubernetes cluster." \
7+
org.opencontainers.image.documentation="https://nova.docs.fairwinds.com/" \
8+
org.opencontainers.image.source="https://github.com/FairwindsOps/nova" \
9+
org.opencontainers.image.url="https://github.com/FairwindsOps/nova" \
10+
org.opencontainers.image.licenses="Apache License 2.0"
11+
12+
USER nobody
13+
COPY nova /
14+
CMD ["/nova"]
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
ARG ARCH="amd64"
2+
ARG OS="linux"
3+
FROM quay.io/prometheus/busybox-${OS}-${ARCH}:latest
4+
LABEL maintainer="The Prometheus Authors <prometheus-developers@googlegroups.com>"
5+
LABEL org.opencontainers.image.authors="The Prometheus Authors" \
6+
org.opencontainers.image.vendor="Prometheus" \
7+
org.opencontainers.image.title="Prometheus" \
8+
org.opencontainers.image.description="The Prometheus monitoring system and time series database" \
9+
org.opencontainers.image.source="https://github.com/prometheus/prometheus" \
10+
org.opencontainers.image.url="https://github.com/prometheus/prometheus" \
11+
org.opencontainers.image.documentation="https://prometheus.io/docs" \
12+
org.opencontainers.image.licenses="Apache License 2.0"
13+
14+
ARG ARCH="amd64"
15+
ARG OS="linux"
16+
COPY .build/${OS}-${ARCH}/prometheus /bin/prometheus
17+
COPY .build/${OS}-${ARCH}/promtool /bin/promtool
18+
COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml
19+
COPY LICENSE /LICENSE
20+
COPY NOTICE /NOTICE
21+
COPY npm_licenses.tar.bz2 /npm_licenses.tar.bz2
22+
23+
WORKDIR /prometheus
24+
RUN chown -R nobody:nobody /etc/prometheus /prometheus && chmod g+w /prometheus
25+
26+
USER nobody
27+
EXPOSE 9090
28+
VOLUME [ "/prometheus" ]
29+
ENTRYPOINT [ "/bin/prometheus" ]
30+
CMD [ "--config.file=/etc/prometheus/prometheus.yml", \
31+
"--storage.tsdb.path=/prometheus" ]

src/somef/utils/constants.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,3 +454,10 @@ class RepositoryType(Enum):
454454
CAT_CODEMETA_SOFTWAREREQUIREMENTS = "softwareRequirements"
455455
CAT_CODEMETA_SOFTWAREVERSION = "softwareVersion"
456456
CAT_CODEMETA_URL = "url"
457+
458+
459+
# DOCKER labels maintainer
460+
# REGEXP_MAINTAINER_LABEL_OCI = r'^\s*LABEL\s+org\.opencontainers\.image\.authors\s*=\s*["\']?(.+?)["\']?\s*$'
461+
REGEXP_MAINTAINER_LABEL_OCI = r'^\s*LABEL\s+org\.opencontainers\.image\.authors\s*=\s*["\']([^"\'\\]+)["\']?\s*(?:\\)?\s*$'
462+
REGEXP_MAINTAINER_LABEL_FREE = r'^\s*LABEL\s+"?maintainer"?\s*=\s*["\']?(.+?)["\']?\s*$'
463+
REGEXP_MAINTAINER = r'^\s*MAINTAINER\s+(.+)$'

0 commit comments

Comments
 (0)