@@ -31,6 +31,18 @@ def __str__(self):
3131 )
3232
3333
34+ def url_looks_like_patch (url ):
35+ return "/nocfbot" not in url and re .match (
36+ r"https://.*\.(diff|patch)(\.gz|\.bz2)?$" , url
37+ )
38+
39+
40+ def url_looks_like_patch_tarball (url ):
41+ return "/nocfbot" not in url and re .match (
42+ r"https://.*\.(tar|tgz|tar\.gz|tar\.bz2|zip)$" , url
43+ )
44+
45+
3446def get_latest_patches_from_thread_url (thread_url ):
3547 """Given a 'whole thread' URL from the archives, find the last message that
3648 had at least one attachment called something.patch. Return the message
@@ -41,34 +53,52 @@ def get_latest_patches_from_thread_url(thread_url):
4153 message_id = None
4254 for line in cfbot_util .slow_fetch (thread_url ).splitlines ():
4355 groups = re .search (
44- '<a href="(/message-id/attachment/[^"]*\\ .(diff|diff \\ .gz|patch|patch \\ .gz|tar \\ .gz|tgz|tar \\ .bz2|zip) )">' ,
56+ '<a href="(/message-id/attachment/[^"]*)">' ,
4557 line ,
4658 )
4759 if groups :
4860 attachment = groups .group (1 )
49- if "/nocfbot" not in attachment and not attachment .endswith (
50- "subtrans-benchmark.tar.gz"
51- ):
52- message_attachments .append ("https://www.postgresql.org" + attachment )
61+ url = "https://www.postgres.org" + attachment
62+ if url_looks_like_patch (url ) or url_looks_like_patch_tarball (url ):
63+ message_attachments .append (url )
5364 selected_message_attachments = message_attachments
5465 selected_message_id = message_id
5566
56- # groups = re.search('<a name="([^"]+)"></a>', line)
67+ # start of a new message?
5768 groups = re .search ('<td><a href="/message-id/[^"]+">([^"]+)</a></td>' , line )
5869 if groups :
5970 message_id = groups .group (1 )
6071 message_attachments = []
61- # if there is a tarball attachment, there must be only one attachment,
62- # otherwise give up on this thread (we don't know how to combine patches and
63- # tarballs)
72+
6473 if selected_message_attachments is not None :
6574 if any (
66- x .endswith (".tgz" ) or x .endswith (".tar.gz" ) or x .endswith (".tar.bz2" )
67- for x in selected_message_attachments
75+ url_looks_like_patch_tarball (url ) for url in selected_message_attachments
6876 ):
69- if len (selected_message_attachments ) > 1 :
77+ # there is a tarball. we don't actually know if it contains any
78+ # patches (rather than, say, benchmark results). this is stupid,
79+ # but we'll try to guess...
80+ #
81+ # XXX the basic problem here is that we can't peek into the
82+ # tarballs and see if they contain patches, which is a bit sad;
83+ # perhaps we should just take everything, and teach the patch
84+ # burner script to examine everything and fail with a special
85+ # result code for 'nothing to do here' if it can't find any
86+ # patches? the point of that would be to avoid running any code
87+ # that downloads and unpacks stuff outside the container, since we
88+ # don't really have enough information here but also don't want to
89+ # touch untrusted data here
90+ if any (url_looks_like_patch (url ) for url in selected_message_attachments ):
91+ # mixture of tarballs and patches, keep only the patches (not
92+ # great as it would be nice to be able to post a tarball + an
93+ # extra plain patch)
94+ selected_message_attachments = list (
95+ filter (url_looks_like_patch , selected_message_attachments )
96+ )
97+ elif len (selected_message_attachments ) > 1 :
98+ # tarball-only, multi-tarball messages not currently supported
7099 selected_message_id = None
71100 selected_message_attachments = None
101+
72102 # if there are multiple patch files, they had better follow the convention
73103 # of leading numbers, otherwise we don't know how to apply them in the right
74104 # order
@@ -172,10 +202,9 @@ def get_current_commitfests():
172202
173203
174204if __name__ == "__main__" :
175- # for name, cf in get_current_commitfests().items():
176- # if not cf:
177- # continue
178- for sub in get_submissions_for_commitfest (55 ):
179- print (str (sub ))
180- # print get_thread_url_for_submission(19, 1787)
181- # print(get_latest_patches_from_thread_url(get_thread_url_for_submission(37, 2901)))
205+ # test case
206+ print (
207+ get_latest_patches_from_thread_url (
208+ "https://www.postgresql.org/message-id/flat/CAApHDvrF6DG7=xD8JGo2HoQKN0LRFNF0ysVt6cKSNPiqbdQOSA@mail.gmail.com"
209+ )
210+ )
0 commit comments