Skip to content

Commit 93cd463

Browse files
committed
Replicate exact Funda mobile app TLS fingerprint
- Add tls_client library for custom JA3 fingerprint support - Use exact JA3 fingerprints captured from real Funda app traffic - FUNDA_JA3: fingerprint without ext 21 (from favourites.funda.io) - FUNDA_JA3_EXT21: fingerprint with ext 21 (from segment.com) - Add fingerprint pool with automatic fallback selection - Match exact header order from app traffic captures - Remove extra headers not present in real app (x-datadog-tags, etc) - Bump version to 2.3.0
1 parent 07107cb commit 93cd463

2 files changed

Lines changed: 187 additions & 57 deletions

File tree

funda/funda.py

Lines changed: 184 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from typing import Any
77

88
from curl_cffi import requests
9-
from curl_cffi.const import CurlHttpVersion
9+
import tls_client
1010

1111
from funda.listing import Listing
1212

@@ -18,16 +18,44 @@
1818
API_SEARCH = "https://listing-search-wonen.funda.io/_msearch/template"
1919
API_WALTER = "https://api.walterliving.com/hunter/lookup"
2020

21-
FUNDA_JA3 = "771,4867-4865-4866-52393-52392-49195-49199-49196-49200-49161-49171-49162-49172-156-157-47-53,0-23-65281-10-11-35-13-51-45-43-21,29-23-24,0"
21+
# Funda mobile app JA3 fingerprints (captured from real Dart/Flutter app traffic)
22+
# JA3 without extension 21 - from favourites.funda.io
23+
# JA3 hash: 9225d95490794840d9d5f1f94d339285
24+
FUNDA_JA3 = "771,4867-4865-4866-52393-52392-49195-49199-49196-49200-49161-49171-49162-49172-156-157-47-53,0-23-65281-10-11-35-13-51-45-43,29-23-24,0"
2225

26+
# JA3 with extension 21 (padding) - from cdn-settings.segment.com
27+
# JA3 hash: 4bf8cdd8919b07d35ca824c20efb3537
28+
FUNDA_JA3_EXT21 = "771,4867-4865-4866-52393-52392-49195-49199-49196-49200-49161-49171-49162-49172-156-157-47-53,0-23-65281-10-11-35-13-51-45-43-21,29-23-24,0"
2329

30+
# Fingerprint pool - tried in order until one works
31+
# Types: "tls_ja3", "curl_impersonate", "tls_client"
32+
FINGERPRINT_POOL = [
33+
# tls_client with exact Funda app JA3 fingerprints
34+
{"type": "tls_ja3", "ja3": FUNDA_JA3},
35+
{"type": "tls_ja3", "ja3": FUNDA_JA3_EXT21},
36+
# curl_cffi impersonate fallback - Safari works best with Funda headers
37+
{"type": "curl_impersonate", "target": "safari15_5"},
38+
{"type": "curl_impersonate", "target": "safari15_3"},
39+
# tls_client preset profiles as final fallback
40+
{"type": "tls_client", "identifier": "okhttp4_android_13"},
41+
{"type": "tls_client", "identifier": "chrome_120"},
42+
]
2443

25-
def _make_headers(host: str, for_search: bool = False) -> list[tuple[str, str]]:
26-
"""Generate headers matching the Funda Android app."""
44+
# Test endpoint to verify fingerprint works
45+
TEST_URL = f"{API_BASE}/tinyId/43117443"
46+
47+
48+
49+
def _make_headers(for_search: bool = False) -> list[tuple[str, str]]:
50+
"""Generate headers matching the Funda Android app exactly.
51+
52+
Header order and values are captured from real Funda app traffic.
53+
"""
2754
trace_id = str(random.randint(10**18, 10**19))
2855
parent_id = hex(random.randint(10**15, 10**16))[2:]
2956
tid = hex(int(time.time()))[2:] + "00000000"
3057

58+
# Base headers in exact order from app traffic
3159
headers = [
3260
("user-agent", "Dart/3.9 (dart:io)"),
3361
("x-datadog-sampling-priority", "0"),
@@ -38,25 +66,21 @@ def _make_headers(host: str, for_search: bool = False) -> list[tuple[str, str]]:
3866
]
3967

4068
if for_search:
41-
# Search endpoint uses referer and accept instead of x-funda-app-platform
69+
# Search endpoint: content-type, referer, accept, then traceparent
4270
headers.extend([
4371
("content-type", "application/json"),
4472
("referer", "https://www.funda.nl/"),
4573
("accept", "application/json"),
4674
])
4775
else:
48-
# Listing endpoint uses x-funda-app-platform
76+
# Listing endpoint: x-funda-app-platform, content-type, then traceparent
4977
headers.extend([
5078
("x-funda-app-platform", "android"),
5179
("content-type", "application/json"),
5280
])
5381

54-
headers.extend([
55-
("traceparent", f"00-{tid}{trace_id[:16]}-{parent_id}-00"),
56-
("host", host),
57-
("x-datadog-tags", f"_dd.p.tid={tid}"),
58-
("x-datadog-trace-id", trace_id),
59-
])
82+
# traceparent is always last
83+
headers.append(("traceparent", f"00-{tid}{trace_id[:16]}-{parent_id}-00"))
6084

6185
return headers
6286

@@ -95,20 +119,145 @@ def __init__(self, timeout: int = 30):
95119
timeout: Request timeout in seconds
96120
"""
97121
self.timeout = timeout
98-
self._session: requests.Session | None = None
122+
self._curl_session: requests.Session | None = None
123+
self._tls_session: tls_client.Session | None = None
124+
self._fingerprint: dict | None = None
99125

100-
@property
101-
def session(self) -> requests.Session:
102-
"""Lazily create HTTP session."""
103-
if self._session is None:
104-
self._session = requests.Session()
105-
return self._session
126+
def _make_headers_dict(self, for_search: bool = False) -> dict[str, str]:
127+
"""Generate headers as dict for tls_client.
128+
129+
Header order and values match the real Funda Android app exactly.
130+
"""
131+
trace_id = str(random.randint(10**18, 10**19))
132+
parent_id = hex(random.randint(10**15, 10**16))[2:]
133+
tid = hex(int(time.time()))[2:] + "00000000"
134+
135+
# Build headers in exact order from app traffic
136+
# Python 3.7+ dicts preserve insertion order
137+
headers = {
138+
"user-agent": "Dart/3.9 (dart:io)",
139+
"x-datadog-sampling-priority": "0",
140+
"x-datadog-origin": "rum",
141+
"tracestate": f"dd=s:0;o:rum;p:{parent_id}",
142+
"accept-encoding": "gzip",
143+
"x-datadog-parent-id": trace_id,
144+
}
145+
146+
if for_search:
147+
# Search endpoint: content-type, referer, accept, then traceparent
148+
headers["content-type"] = "application/json"
149+
headers["referer"] = "https://www.funda.nl/"
150+
headers["accept"] = "application/json"
151+
else:
152+
# Listing endpoint: x-funda-app-platform, content-type, then traceparent
153+
headers["x-funda-app-platform"] = "android"
154+
headers["content-type"] = "application/json"
155+
156+
# traceparent is always last
157+
headers["traceparent"] = f"00-{tid}{trace_id[:16]}-{parent_id}-00"
158+
159+
return headers
160+
161+
def _test_fingerprint(self, fingerprint: dict) -> bool:
162+
"""Test if a fingerprint works against Funda API."""
163+
try:
164+
fp_type = fingerprint["type"]
165+
if fp_type == "tls_ja3":
166+
# tls_client with custom JA3 - primary method for Funda app fingerprint
167+
session = tls_client.Session(ja3_string=fingerprint["ja3"], random_tls_extension_order=False)
168+
headers = self._make_headers_dict()
169+
response = session.get(TEST_URL, headers=headers, timeout_seconds=5)
170+
elif fp_type == "curl_ja3":
171+
session = requests.Session()
172+
headers = _make_headers()
173+
response = session.get(TEST_URL, headers=headers, ja3=fingerprint["ja3"], timeout=5)
174+
session.close()
175+
elif fp_type == "curl_impersonate":
176+
session = requests.Session(impersonate=fingerprint["target"])
177+
headers = _make_headers()
178+
response = session.get(TEST_URL, headers=headers, timeout=5)
179+
session.close()
180+
elif fp_type == "tls_client":
181+
session = tls_client.Session(client_identifier=fingerprint["identifier"], random_tls_extension_order=False)
182+
headers = self._make_headers_dict()
183+
response = session.get(TEST_URL, headers=headers, timeout_seconds=5)
184+
else:
185+
return False
186+
return response.status_code == 200
187+
except Exception:
188+
return False
189+
190+
def _find_working_fingerprint(self) -> dict:
191+
"""Find a working fingerprint from the pool."""
192+
for fp in FINGERPRINT_POOL:
193+
if self._test_fingerprint(fp):
194+
return fp
195+
raise RuntimeError("No working fingerprint found. Funda may have updated their bot detection.")
196+
197+
def _ensure_session(self) -> None:
198+
"""Ensure a working session is created."""
199+
if self._fingerprint is None:
200+
self._fingerprint = self._find_working_fingerprint()
201+
202+
fp_type = self._fingerprint["type"]
203+
if fp_type == "tls_ja3":
204+
# tls_client with custom JA3 - exact Funda app fingerprint
205+
if self._tls_session is None:
206+
self._tls_session = tls_client.Session(
207+
ja3_string=self._fingerprint["ja3"],
208+
random_tls_extension_order=False
209+
)
210+
elif fp_type == "curl_ja3":
211+
if self._curl_session is None:
212+
self._curl_session = requests.Session()
213+
elif fp_type == "curl_impersonate":
214+
if self._curl_session is None:
215+
self._curl_session = requests.Session(impersonate=self._fingerprint["target"])
216+
elif fp_type == "tls_client":
217+
if self._tls_session is None:
218+
self._tls_session = tls_client.Session(
219+
client_identifier=self._fingerprint["identifier"],
220+
random_tls_extension_order=False
221+
)
222+
223+
def _get(self, url: str, headers_list: list[tuple[str, str]]) -> Any:
224+
"""Make GET request using the active session."""
225+
self._ensure_session()
226+
fp_type = self._fingerprint["type"]
227+
228+
if fp_type in ("tls_ja3", "tls_client"):
229+
headers = self._make_headers_dict()
230+
return self._tls_session.get(url, headers=headers, timeout_seconds=self.timeout)
231+
elif fp_type == "curl_ja3":
232+
return self._curl_session.get(url, headers=headers_list, ja3=self._fingerprint["ja3"], timeout=self.timeout)
233+
else:
234+
return self._curl_session.get(url, headers=headers_list, timeout=self.timeout)
235+
236+
def _post(self, url: str, headers_list: list[tuple[str, str]], data: str = None, json_data: dict = None, for_search: bool = False) -> Any:
237+
"""Make POST request using the active session."""
238+
self._ensure_session()
239+
fp_type = self._fingerprint["type"]
240+
241+
if fp_type in ("tls_ja3", "tls_client"):
242+
headers = self._make_headers_dict(for_search=for_search)
243+
if json_data:
244+
return self._tls_session.post(url, headers=headers, json=json_data, timeout_seconds=self.timeout)
245+
return self._tls_session.post(url, headers=headers, data=data, timeout_seconds=self.timeout)
246+
elif fp_type == "curl_ja3":
247+
if json_data:
248+
return self._curl_session.post(url, headers=headers_list, json=json_data, ja3=self._fingerprint["ja3"], timeout=self.timeout)
249+
return self._curl_session.post(url, headers=headers_list, data=data, ja3=self._fingerprint["ja3"], timeout=self.timeout)
250+
else:
251+
if json_data:
252+
return self._curl_session.post(url, headers=headers_list, json=json_data, timeout=self.timeout)
253+
return self._curl_session.post(url, headers=headers_list, data=data, timeout=self.timeout)
106254

107255
def close(self) -> None:
108256
"""Close the HTTP session."""
109-
if self._session:
110-
self._session.close()
111-
self._session = None
257+
if self._curl_session:
258+
self._curl_session.close()
259+
self._curl_session = None
260+
self._tls_session = None
112261

113262
def __enter__(self) -> "Funda":
114263
return self
@@ -142,26 +291,19 @@ def get_listing(self, listing_id: int | str) -> Listing:
142291

143292
# Try tinyId endpoint first (8-9 digits), then globalId (7 digits)
144293
listing_id_str = str(listing_id)
145-
host = "listing-detail-page.funda.io"
146294
if len(listing_id_str) >= 8:
147295
url = API_LISTING_TINY.format(tiny_id=listing_id_str)
148296
else:
149297
url = API_LISTING.format(listing_id=listing_id_str)
150298

151-
headers = _make_headers(host)
152-
response = self.session.get(
153-
url, headers=headers, ja3=FUNDA_JA3,
154-
http_version=CurlHttpVersion.V1_1, timeout=self.timeout
155-
)
299+
headers = _make_headers()
300+
response = self._get(url, headers)
156301

157302
# If tinyId fails, try as globalId
158303
if response.status_code == 404 and len(listing_id_str) >= 8:
159304
url = API_LISTING.format(listing_id=listing_id_str)
160-
headers = _make_headers(host)
161-
response = self.session.get(
162-
url, headers=headers, ja3=FUNDA_JA3,
163-
http_version=CurlHttpVersion.V1_1, timeout=self.timeout
164-
)
305+
headers = _make_headers()
306+
response = self._get(url, headers)
165307

166308
if response.status_code != 200:
167309
raise LookupError(f"Listing {listing_id} not found")
@@ -299,17 +441,9 @@ def search_listing(
299441
query = f"{index_line}\n{query_line}\n"
300442

301443
# Retry on intermittent 400 errors from API
302-
host = "listing-search-wonen.funda.io"
303444
for attempt in range(3):
304-
headers = _make_headers(host, for_search=True)
305-
response = self.session.post(
306-
API_SEARCH,
307-
headers=headers,
308-
data=query,
309-
ja3=FUNDA_JA3,
310-
http_version=CurlHttpVersion.V1_1,
311-
timeout=self.timeout,
312-
)
445+
headers = _make_headers(for_search=True)
446+
response = self._post(API_SEARCH, headers, data=query, for_search=True)
313447
if response.status_code == 200:
314448
break
315449
if response.status_code == 400 and attempt < 2:
@@ -502,16 +636,12 @@ def poll_new_listings(
502636
"""
503637
consecutive_404s = 0
504638
current_id = since_id + 1
505-
host = "listing-detail-page.funda.io"
506639

507640
while consecutive_404s < max_consecutive_404s:
508641
url = API_LISTING.format(listing_id=current_id)
509642
try:
510-
headers = _make_headers(host)
511-
response = self.session.get(
512-
url, headers=headers, ja3=FUNDA_JA3,
513-
http_version=CurlHttpVersion.V1_1, timeout=self.timeout
514-
)
643+
headers = _make_headers()
644+
response = self._get(url, headers)
515645

516646
if response.status_code == 200:
517647
consecutive_404s = 0
@@ -580,13 +710,11 @@ def get_price_history(self, listing: Listing | str) -> list[dict]:
580710
"zipcode": postcode,
581711
}
582712

583-
response = self.session.post(
584-
API_WALTER,
585-
json=payload,
586-
headers={"Accept": "application/json", "Content-Type": "application/json"},
587-
timeout=self.timeout,
588-
http_version=CurlHttpVersion.V1_1,
589-
)
713+
walter_headers = [
714+
("Accept", "application/json"),
715+
("Content-Type", "application/json"),
716+
]
717+
response = self._post(API_WALTER, walter_headers, json_data=payload)
590718

591719
if response.status_code != 200:
592720
raise LookupError(f"Could not fetch price history (status {response.status_code})")

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "pyfunda"
7-
version = "2.2.1"
7+
version = "2.3.0"
88
description = "Python API for Funda.nl real estate listings"
99
readme = "README.md"
1010
license = "AGPL-3.0-or-later"
@@ -24,6 +24,8 @@ classifiers = [
2424
]
2525
dependencies = [
2626
"curl-cffi>=0.14.0",
27+
"tls-client>=1.0.1",
28+
"typing_extensions>=4.0.0",
2729
]
2830

2931
[project.urls]

0 commit comments

Comments
 (0)