Skip to content

Commit 3cd690d

Browse files
authored
Vendor advocate (baserow#4704)
1 parent 3178e90 commit 3cd690d

File tree

27 files changed

+1763
-183
lines changed

27 files changed

+1763
-183
lines changed

backend/pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ dependencies = [
3535
"celery[redis]==5.6.2",
3636
"django-redis==6.0.0",
3737
"django-celery-email-reboot==4.2.0",
38-
"advocate==1.0.0",
3938
"zipp==3.23.0",
4039
"unicodecsv==0.14.1",
4140
"django-celery-beat==2.8.1",
@@ -103,6 +102,8 @@ dependencies = [
103102
"pyotp==2.9.0",
104103
"qrcode==8.2",
105104
"udspy==0.1.8",
105+
"netifaces==0.11.0",
106+
"requests-futures>=1.0.2",
106107
]
107108

108109
[project.urls]
@@ -142,7 +143,6 @@ dev = [
142143
"pytest-unordered==0.7.0",
143144
"debugpy==1.8.20",
144145
"backports.cached-property==1.0.2",
145-
"httpretty==1.1.4",
146146
"graphviz==0.21",
147147
"pytest-cov==7.0.0",
148148
"django-stubs==5.2.8",

backend/pytest.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ testpaths =
1212
tests
1313
../premium/backend/tests
1414
../enterprise/backend/tests
15+
src/advocate/test_advocate.py
1516
asyncio_default_fixture_loop_scope = function
1617
markers =
1718
field_text: All tests related to text field

backend/src/advocate/LICENSE

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Copyright 2015 Jordan Milne
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.

backend/src/advocate/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
__version__ = "1.0.0"
2+
3+
from requests import utils # noqa: F401
4+
from requests.exceptions import (
5+
ConnectionError, # noqa: F401
6+
HTTPError, # noqa: F401
7+
RequestException, # noqa: F401
8+
Timeout, # noqa: F401
9+
TooManyRedirects, # noqa: F401
10+
URLRequired, # noqa: F401
11+
)
12+
from requests.models import PreparedRequest, Request, Response # noqa: F401
13+
from requests.status_codes import codes # noqa: F401
14+
15+
from .adapters import ValidatingHTTPAdapter # noqa: F401
16+
from .addrvalidator import AddrValidator # noqa: F401
17+
from .api import * # noqa: F403
18+
from .exceptions import UnacceptableAddressException # noqa: F401

backend/src/advocate/adapters.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from requests.adapters import DEFAULT_POOLBLOCK, HTTPAdapter
2+
3+
from .addrvalidator import AddrValidator
4+
from .exceptions import ProxyDisabledException
5+
from .poolmanager import ValidatingPoolManager
6+
7+
8+
class ValidatingHTTPAdapter(HTTPAdapter):
9+
__attrs__ = HTTPAdapter.__attrs__ + ["_validator"]
10+
11+
def __init__(self, *args, **kwargs):
12+
self._validator = kwargs.pop("validator", None)
13+
if not self._validator:
14+
self._validator = AddrValidator()
15+
super().__init__(*args, **kwargs)
16+
17+
def init_poolmanager(
18+
self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs
19+
):
20+
self._pool_connections = connections
21+
self._pool_maxsize = maxsize
22+
self._pool_block = block
23+
self.poolmanager = ValidatingPoolManager(
24+
num_pools=connections,
25+
maxsize=maxsize,
26+
block=block,
27+
validator=self._validator,
28+
**pool_kwargs,
29+
)
30+
31+
def proxy_manager_for(self, proxy, **proxy_kwargs):
32+
raise ProxyDisabledException("Proxies cannot be used with Advocate")
Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
import fnmatch
2+
import functools
3+
import ipaddress
4+
import re
5+
6+
try:
7+
import netifaces
8+
9+
HAVE_NETIFACES = True
10+
except ImportError:
11+
netifaces = None
12+
HAVE_NETIFACES = False
13+
14+
from .exceptions import ConfigException, NameserverException
15+
16+
17+
def canonicalize_hostname(hostname):
18+
"""Lowercase and punycodify a hostname"""
19+
# We do the lowercasing after IDNA encoding because we only want to
20+
# lowercase the *ASCII* chars.
21+
# TODO: The differences between IDNA2003 and IDNA2008 might be relevant
22+
# to us, but both specs are damn confusing.
23+
return str(hostname.encode("idna").lower(), "utf-8")
24+
25+
26+
def determine_local_addresses():
27+
"""Get all IPs that refer to this machine according to netifaces"""
28+
if not HAVE_NETIFACES:
29+
raise ConfigException(
30+
"Tried to determine local addresses, "
31+
"but netifaces module was not importable"
32+
)
33+
ips = []
34+
for interface in netifaces.interfaces():
35+
if_families = netifaces.ifaddresses(interface)
36+
for family_kind in {netifaces.AF_INET, netifaces.AF_INET6}:
37+
addrs = if_families.get(family_kind, [])
38+
for addr in (x.get("addr", "") for x in addrs):
39+
if family_kind == netifaces.AF_INET6:
40+
# We can't do anything sensible with the scope here
41+
addr = addr.split("%")[0]
42+
ips.append(ipaddress.ip_network(addr))
43+
return ips
44+
45+
46+
def add_local_address_arg(func):
47+
"""Add the "_local_addresses" kwarg if it's missing
48+
49+
IMO this information shouldn't be cached between calls (what if one of the
50+
adapters got a new IP at runtime?,) and we don't want each function to
51+
recalculate it. Just recalculate it if the caller didn't provide it for us.
52+
"""
53+
54+
@functools.wraps(func)
55+
def wrapper(self, *args, **kwargs):
56+
if "_local_addresses" not in kwargs:
57+
if self.autodetect_local_addresses:
58+
kwargs["_local_addresses"] = determine_local_addresses()
59+
else:
60+
kwargs["_local_addresses"] = []
61+
return func(self, *args, **kwargs)
62+
63+
return wrapper
64+
65+
66+
class AddrValidator:
67+
_6TO4_RELAY_NET = ipaddress.ip_network("192.88.99.0/24")
68+
# Just the well known prefix, DNS64 servers can set their own
69+
# prefix, but in practice most probably don't.
70+
_DNS64_WK_PREFIX = ipaddress.ip_network("64:ff9b::/96")
71+
DEFAULT_PORT_WHITELIST = {80, 8080, 443, 8443, 8000}
72+
73+
def __init__(
74+
self,
75+
ip_blacklist=None,
76+
ip_whitelist=None,
77+
port_whitelist=None,
78+
port_blacklist=None,
79+
hostname_blacklist=None,
80+
allow_ipv6=False,
81+
allow_teredo=False,
82+
allow_6to4=False,
83+
allow_dns64=False,
84+
# Must be explicitly set to "False" if you don't want to try
85+
# detecting local interface addresses with netifaces.
86+
autodetect_local_addresses=True,
87+
):
88+
if not port_blacklist and not port_whitelist:
89+
# An assortment of common HTTPS? ports.
90+
port_whitelist = self.DEFAULT_PORT_WHITELIST.copy()
91+
self.ip_blacklist = ip_blacklist or set()
92+
self.ip_whitelist = ip_whitelist or set()
93+
self.port_blacklist = port_blacklist or set()
94+
self.port_whitelist = port_whitelist or set()
95+
# TODO: ATM this can contain either regexes or globs that are converted
96+
# to regexes upon every check. Create a collection that automagically
97+
# converts them to regexes on insert?
98+
self.hostname_blacklist = hostname_blacklist or set()
99+
self.allow_ipv6 = allow_ipv6
100+
self.allow_teredo = allow_teredo
101+
self.allow_6to4 = allow_6to4
102+
self.allow_dns64 = allow_dns64
103+
self.autodetect_local_addresses = autodetect_local_addresses
104+
105+
@add_local_address_arg
106+
def is_ip_allowed(self, addr_ip, _local_addresses=None):
107+
if not isinstance(addr_ip, (ipaddress.IPv4Address, ipaddress.IPv6Address)):
108+
addr_ip = ipaddress.ip_address(addr_ip)
109+
110+
# The whitelist should take precedence over the blacklist so we can
111+
# punch holes in blacklisted ranges
112+
if any(addr_ip in net for net in self.ip_whitelist):
113+
return True
114+
115+
if any(addr_ip in net for net in self.ip_blacklist):
116+
return False
117+
118+
if any(addr_ip in net for net in _local_addresses):
119+
return False
120+
121+
if addr_ip.version == 4:
122+
if not addr_ip.is_private:
123+
# IPs for carrier-grade NAT. Seems weird that it doesn't set
124+
# `is_private`, but we need to check `not is_global`
125+
if not ipaddress.ip_network(addr_ip).is_global:
126+
return False
127+
elif addr_ip.version == 6:
128+
# You'd better have a good reason for enabling IPv6
129+
# because Advocate's techniques don't work well without NAT.
130+
if not self.allow_ipv6:
131+
return False
132+
133+
# v6 addresses can also map to IPv4 addresses! Tricky!
134+
v4_nested = []
135+
if addr_ip.ipv4_mapped:
136+
v4_nested.append(addr_ip.ipv4_mapped)
137+
# WTF IPv6? Why you gotta have a billion tunneling mechanisms?
138+
# XXX: Do we even really care about these? If we're tunneling
139+
# through public servers we shouldn't be able to access
140+
# addresses on our private network, right?
141+
if addr_ip.sixtofour:
142+
if not self.allow_6to4:
143+
return False
144+
v4_nested.append(addr_ip.sixtofour)
145+
if addr_ip.teredo:
146+
if not self.allow_teredo:
147+
return False
148+
# Check both the client *and* server IPs
149+
v4_nested.extend(addr_ip.teredo)
150+
if addr_ip in self._DNS64_WK_PREFIX:
151+
if not self.allow_dns64:
152+
return False
153+
# When using the well-known prefix the last 4 bytes
154+
# are the IPv4 addr
155+
v4_nested.append(ipaddress.ip_address(addr_ip.packed[-4:]))
156+
157+
if not all(self.is_ip_allowed(addr_v4) for addr_v4 in v4_nested):
158+
return False
159+
160+
# fec0::*, apparently deprecated?
161+
if addr_ip.is_site_local:
162+
return False
163+
else:
164+
raise ValueError("Unsupported IP version(?): %r" % addr_ip)
165+
166+
# 169.254.XXX.XXX, AWS uses these for autoconfiguration
167+
if addr_ip.is_link_local:
168+
return False
169+
# 127.0.0.1, ::1, etc.
170+
if addr_ip.is_loopback:
171+
return False
172+
if addr_ip.is_multicast:
173+
return False
174+
# 192.168.XXX.XXX, 10.XXX.XXX.XXX
175+
if addr_ip.is_private:
176+
return False
177+
# 255.255.255.255, ::ffff:XXXX:XXXX (v6->v4) mapping
178+
if addr_ip.is_reserved:
179+
return False
180+
# There's no reason to connect directly to a 6to4 relay
181+
if addr_ip in self._6TO4_RELAY_NET:
182+
return False
183+
# 0.0.0.0
184+
if addr_ip.is_unspecified:
185+
return False
186+
187+
# It doesn't look bad, so... it's must be ok!
188+
return True
189+
190+
def _hostname_matches_pattern(self, hostname, pattern):
191+
# If they specified a string, just assume they only want basic globbing.
192+
# This stops people from not realizing they're dealing in REs and
193+
# not escaping their periods unless they specifically pass in an RE.
194+
# This has the added benefit of letting us sanely handle globbed
195+
# IDNs by default.
196+
if isinstance(pattern, str):
197+
# convert the glob to a punycode glob, then a regex
198+
pattern = fnmatch.translate(canonicalize_hostname(pattern))
199+
200+
hostname = canonicalize_hostname(hostname)
201+
# Down the line the hostname may get treated as a null-terminated string
202+
# (as with `socket.getaddrinfo`.) Try to account for that.
203+
#
204+
# >>> socket.getaddrinfo("example.com\x00aaaa", 80)
205+
# [(2, 1, 6, '', ('93.184.216.34', 80)), [...]
206+
no_null_hostname = hostname.split("\x00")[0]
207+
208+
return any(
209+
re.match(pattern, x.strip(".")) for x in (no_null_hostname, hostname)
210+
)
211+
212+
def is_hostname_allowed(self, hostname):
213+
# Sometimes (like with "external" services that your IP has privileged
214+
# access to) you might not always know the IP range to blacklist access
215+
# to, or the `A` record might change without you noticing.
216+
# For e.x.: `foocorp.external.org`.
217+
#
218+
# Another option is doing something like:
219+
#
220+
# for addrinfo in socket.getaddrinfo("foocorp.external.org", 80):
221+
# global_validator.ip_blacklist.add(ip_address(addrinfo[4][0]))
222+
#
223+
# but that's not always a good idea if they're behind a third-party lb.
224+
for pattern in self.hostname_blacklist:
225+
if self._hostname_matches_pattern(hostname, pattern):
226+
return False
227+
return True
228+
229+
@add_local_address_arg
230+
def is_addrinfo_allowed(self, addrinfo, _local_addresses=None):
231+
assert len(addrinfo) == 5
232+
# XXX: Do we care about any of the other elements? Guessing not.
233+
family, socktype, proto, canonname, sockaddr = addrinfo
234+
235+
# The 4th elem inaddrinfo may either be a touple of two or four items,
236+
# depending on whether we're dealing with IPv4 or v6
237+
if len(sockaddr) == 2:
238+
# v4
239+
ip, port = sockaddr
240+
elif len(sockaddr) == 4:
241+
# v6
242+
# XXX: what *are* `flow_info` and `scope_id`? Anything useful?
243+
# Seems like we can figure out all we need about the scope from
244+
# the `is_<x>` properties.
245+
ip, port, flow_info, scope_id = sockaddr
246+
else:
247+
raise ValueError("Unexpected addrinfo format %r" % sockaddr)
248+
249+
# Probably won't help protect against SSRF, but might prevent our being
250+
# used to attack others' non-HTTP services. See
251+
# http://www.remote.org/jochen/sec/hfpa/
252+
if self.port_whitelist and port not in self.port_whitelist:
253+
return False
254+
if port in self.port_blacklist:
255+
return False
256+
257+
if self.hostname_blacklist:
258+
if not canonname:
259+
raise NameserverException(
260+
"addrinfo must contain the canon name to do blacklisting "
261+
"based on hostname. Make sure you use the "
262+
"`socket.AI_CANONNAME` flag, and that each record contains "
263+
"the canon name. Your DNS server might also be garbage."
264+
)
265+
266+
if not self.is_hostname_allowed(canonname):
267+
return False
268+
269+
return self.is_ip_allowed(ip, _local_addresses=_local_addresses)

0 commit comments

Comments
 (0)