Skip to content
This repository was archived by the owner on Apr 22, 2026. It is now read-only.

Commit ab259d6

Browse files
committed
Add subpackage for github fetching
1 parent 7650d0c commit ab259d6

3 files changed

Lines changed: 351 additions & 0 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from gitlab2prov.adapters.hub.fetcher import GithubFetcher
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
import logging
2+
import itertools
3+
from typing import Iterator
4+
from dataclasses import dataclass, field, InitVar
5+
6+
from github import Github
7+
from github.Repository import Repository
8+
9+
from gitlab2prov.adapters.project_url import GithubProjectUrl
10+
from gitlab2prov.adapters.hub.parser import GithubAnnotationParser
11+
from gitlab2prov.domain.constants import ProvRole
12+
from gitlab2prov.domain.objects import (
13+
Asset,
14+
User,
15+
Commit,
16+
Issue,
17+
MergeRequest,
18+
GitTag,
19+
Release,
20+
)
21+
22+
23+
log = logging.getLogger(__name__)
24+
25+
26+
@dataclass
27+
class GithubFetcher:
28+
token: InitVar[str]
29+
url: InitVar[str]
30+
31+
parser: GithubAnnotationParser = GithubAnnotationParser()
32+
client: Github = field(init=False)
33+
repository: Repository = field(init=False)
34+
35+
def __post_init__(self, token, url) -> None:
36+
self.client = Github(login_or_token=token, per_page=100)
37+
self.repository = self.client.get_repo(full_name_or_id=GithubProjectUrl(url).slug)
38+
log.warning(f"Remaining requests: {self.client.rate_limiting[0]}")
39+
40+
def fetch_all(self) -> Iterator[Commit | Issue | MergeRequest | Release | GitTag]:
41+
yield from itertools.chain(
42+
self.fetch_commits(),
43+
self.fetch_issues(),
44+
self.fetch_mergerequests(),
45+
self.fetch_releases(),
46+
self.fetch_tags(),
47+
)
48+
49+
def fetch_commits(self) -> Iterator[Commit]:
50+
for commit in self.repository.get_commits():
51+
raw_annotations = [
52+
*commit.get_statuses(),
53+
*commit.get_comments(),
54+
*(comment.get_reactions() for comment in commit.get_comments()),
55+
]
56+
yield Commit(
57+
sha=commit.sha,
58+
url=commit.url,
59+
author=User(
60+
commit.commit.author.name,
61+
commit.commit.author.email,
62+
prov_role=ProvRole.COMMIT_AUTHOR,
63+
),
64+
platform="github",
65+
annotations=self.parser.parse(raw_annotations),
66+
authored_at=commit.commit.author.date,
67+
committed_at=commit.commit.committer.date,
68+
)
69+
70+
def fetch_issues(self) -> Iterator[Issue]:
71+
for issue in self.repository.get_issues(state="all"):
72+
raw_annotations = [
73+
*issue.get_comments(),
74+
*issue.get_reactions(),
75+
*(comment.get_reactions() for comment in issue.get_comments()),
76+
*issue.get_events(),
77+
*issue.get_timeline(),
78+
]
79+
yield Issue(
80+
id=issue.number,
81+
iid=issue.id,
82+
platform="github",
83+
title=issue.title,
84+
body=issue.body,
85+
url=issue.url,
86+
author=User(issue.user.name, issue.user.email, prov_role=ProvRole.ISSUE_AUTHOR),
87+
annotations=self.parser.parse(raw_annotations),
88+
created_at=issue.created_at,
89+
closed_at=issue.closed_at,
90+
)
91+
92+
def fetch_mergerequests(self) -> Iterator[MergeRequest]:
93+
for pull in self.repository.get_pulls(state="all"):
94+
raw_annotations = []
95+
raw_annotations.extend(pull.get_comments())
96+
raw_annotations.extend(comment.get_reactions() for comment in pull.get_comments())
97+
raw_annotations.extend(pull.get_review_comments())
98+
raw_annotations.extend(
99+
comment.get_reactions() for comment in pull.get_review_comments()
100+
)
101+
raw_annotations.extend(pull.get_reviews())
102+
raw_annotations.extend(pull.as_issue().get_reactions())
103+
raw_annotations.extend(pull.as_issue().get_events())
104+
raw_annotations.extend(pull.as_issue().get_timeline())
105+
106+
yield MergeRequest(
107+
id=pull.number,
108+
iid=pull.id,
109+
title=pull.title,
110+
body=pull.body,
111+
url=pull.url,
112+
platform="github",
113+
source_branch=pull.base.ref,
114+
target_branch=pull.head.ref,
115+
author=User(
116+
name=pull.user.name,
117+
email=pull.user.email,
118+
prov_role=ProvRole.MERGE_REQUEST_AUTHOR,
119+
),
120+
annotations=self.parser.parse(raw_annotations),
121+
created_at=pull.created_at,
122+
closed_at=pull.closed_at,
123+
merged_at=pull.merged_at,
124+
)
125+
126+
def fetch_releases(self) -> Iterator[Release]:
127+
for release in self.repository.get_releases():
128+
yield Release(
129+
name=release.title,
130+
body=release.body,
131+
tag_name=release.tag_name,
132+
platform="github",
133+
author=User(
134+
name=release.author.name,
135+
email=release.author.email,
136+
prov_role=ProvRole.RELEASE_AUTHOR,
137+
),
138+
assets=[
139+
Asset(url=asset.url, format=asset.content_type)
140+
for asset in release.get_assets()
141+
],
142+
evidences=[],
143+
created_at=release.created_at,
144+
released_at=release.published_at,
145+
)
146+
147+
def fetch_tags(self) -> Iterator[GitTag]:
148+
for tag in self.repository.get_tags():
149+
yield GitTag(
150+
name=tag.name,
151+
sha=tag.commit.sha,
152+
message=tag.commit.commit.message,
153+
author=User(
154+
name=tag.commit.author.name,
155+
email=tag.commit.author.email,
156+
prov_role=ProvRole.TAG_AUTHOR,
157+
),
158+
created_at=tag.commit.commit.author.date,
159+
)

gitlab2prov/adapters/hub/parser.py

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
import logging
2+
from dataclasses import dataclass
3+
from typing import TypeVar, Callable
4+
5+
from github.CommitComment import CommitComment
6+
from github.CommitStatus import CommitStatus
7+
from github.Reaction import Reaction
8+
from github.IssueComment import IssueComment
9+
from github.IssueEvent import IssueEvent
10+
from github.TimelineEvent import TimelineEvent
11+
from github.PullRequestComment import PullRequestComment
12+
from github.PullRequestReview import PullRequestReview
13+
14+
from gitlab2prov.domain.objects import Annotation, User
15+
from gitlab2prov.domain.constants import ProvRole
16+
17+
A = TypeVar("A")
18+
19+
log = logging.getLogger(__name__)
20+
21+
22+
@dataclass
23+
class GithubAnnotationParser:
24+
@staticmethod
25+
def sort_by_date(annotations: list[Annotation]) -> list[Annotation]:
26+
return list(sorted(annotations, key=lambda a: a.start))
27+
28+
def choose_parser(self, raw_annotation: A) -> Callable[[A], Annotation]:
29+
match raw_annotation:
30+
case CommitComment():
31+
return self.parse_commit_comment
32+
case CommitStatus():
33+
return self.parse_commit_status
34+
case Reaction():
35+
return self.parse_reaction
36+
case IssueComment():
37+
return self.parse_issue_comment
38+
case IssueEvent():
39+
return self.parse_issue_event
40+
case TimelineEvent():
41+
return self.parse_timeline_event
42+
case PullRequestReview():
43+
return self.parse_pull_request_review
44+
case PullRequestComment():
45+
return self.parse_pull_request_comment
46+
case _:
47+
log.warning(f"no parser found for {raw_annotation=}")
48+
49+
def parse(self, annotations: list[A]) -> list[Annotation]:
50+
parsed_annotations = []
51+
for annotation in annotations:
52+
if parser := self.choose_parser(annotation):
53+
parsed_annotations.append(parser(annotation))
54+
return self.sort_by_date(parsed_annotations)
55+
56+
def parse_commit_comment(self, comment: CommitComment) -> Annotation:
57+
annotator = User(
58+
name=comment.user.name,
59+
email=comment.user.email,
60+
github_username=comment.user.login,
61+
github_id=comment.user.id,
62+
prov_role=ProvRole.ANNOTATOR,
63+
)
64+
return Annotation(
65+
uid=comment.id,
66+
name="add_comment",
67+
body=comment.body,
68+
start=comment.created_at,
69+
end=comment.created_at,
70+
annotator=annotator,
71+
)
72+
73+
def parse_commit_status(self, status: CommitStatus) -> Annotation:
74+
annotator = User(
75+
name=status.creator.name,
76+
email=status.creator.email,
77+
github_username=status.creator.login,
78+
github_id=status.creator.id,
79+
prov_role=ProvRole.ANNOTATOR,
80+
)
81+
return Annotation(
82+
uid=status.id,
83+
name="add_commit_status",
84+
body=status.description,
85+
start=status.created_at,
86+
end=status.created_at,
87+
annotator=annotator,
88+
)
89+
90+
def parse_reaction(self, reaction: Reaction) -> Annotation:
91+
annotator = User(
92+
name=reaction.user.name,
93+
email=reaction.user.email,
94+
github_username=reaction.user.login,
95+
github_id=reaction.user.id,
96+
prov_role=ProvRole.ANNOTATOR,
97+
)
98+
return Annotation(
99+
uid=reaction.id,
100+
name="add_award",
101+
body=reaction.content,
102+
start=reaction.created_at,
103+
end=reaction.created_at,
104+
annotator=annotator,
105+
)
106+
107+
108+
def parse_issue_comment(self, comment: IssueComment) -> Annotation:
109+
annotator = User(
110+
name=comment.user.name,
111+
email=comment.user.email,
112+
github_username=comment.user.login,
113+
github_id=comment.user.id,
114+
prov_role=ProvRole.ANNOTATOR,
115+
)
116+
return Annotation(
117+
uid=comment.id,
118+
name="add_comment",
119+
body=comment.body,
120+
start=comment.created_at,
121+
end=comment.created_at,
122+
annotator=annotator,
123+
)
124+
125+
def parse_issue_event(self, event: IssueEvent) -> Annotation:
126+
annotator = User(
127+
name=event.actor.name,
128+
email=event.actor.email,
129+
github_username=event.actor.login,
130+
github_id=event.actor.id,
131+
prov_role=ProvRole.ANNOTATOR,
132+
)
133+
return Annotation(
134+
uid=event.id,
135+
name=event.event,
136+
body=event.event,
137+
start=event.created_at,
138+
end=event.created_at,
139+
annotator=annotator,
140+
)
141+
142+
def parse_timeline_event(self, event: TimelineEvent) -> Annotation:
143+
annotator = User(
144+
name=event.actor.name,
145+
email=event.actor.email,
146+
github_username=event.actor.login,
147+
github_id=event.actor.id,
148+
prov_role=ProvRole.ANNOTATOR,
149+
)
150+
return Annotation(
151+
uid=event.id,
152+
name=event.event,
153+
body=event.event,
154+
start=event.created_at,
155+
end=event.created_at,
156+
annotator=annotator,
157+
)
158+
159+
def parse_pull_request_review(self, review: PullRequestReview) -> Annotation:
160+
annotator = User(
161+
name=review.user.name,
162+
email=review.user.email,
163+
github_username=review.user.login,
164+
github_id=review.user.id,
165+
prov_role=ProvRole.ANNOTATOR,
166+
)
167+
return Annotation(
168+
uid=review.id,
169+
name="add_review",
170+
body=review.body,
171+
start=review.submitted_at,
172+
end=review.submitted_at,
173+
annotator=annotator,
174+
)
175+
176+
def parse_pull_request_comment(self, comment: PullRequestComment) -> Annotation:
177+
annotator = User(
178+
name=comment.user.name,
179+
email=comment.user.email,
180+
github_username=comment.user.login,
181+
github_id=comment.user.id,
182+
prov_role=ProvRole.ANNOTATOR,
183+
)
184+
return Annotation(
185+
uid=comment.id,
186+
name="add_comment",
187+
body=comment.body,
188+
start=comment.created_at,
189+
end=comment.created_at,
190+
annotator=annotator,
191+
)

0 commit comments

Comments
 (0)