66from typing import Any
77
88from curl_cffi import requests
9- from curl_cffi . const import CurlHttpVersion
9+ import tls_client
1010
1111from funda .listing import Listing
1212
1818API_SEARCH = "https://listing-search-wonen.funda.io/_msearch/template"
1919API_WALTER = "https://api.walterliving.com/hunter/lookup"
2020
21- FUNDA_JA3 = "771,4867-4865-4866-52393-52392-49195-49199-49196-49200-49161-49171-49162-49172-156-157-47-53,0-23-65281-10-11-35-13-51-45-43-21,29-23-24,0"
21+ # Funda mobile app JA3 fingerprints (captured from real Dart/Flutter app traffic)
22+ # JA3 without extension 21 - from favourites.funda.io
23+ # JA3 hash: 9225d95490794840d9d5f1f94d339285
24+ FUNDA_JA3 = "771,4867-4865-4866-52393-52392-49195-49199-49196-49200-49161-49171-49162-49172-156-157-47-53,0-23-65281-10-11-35-13-51-45-43,29-23-24,0"
2225
26+ # JA3 with extension 21 (padding) - from cdn-settings.segment.com
27+ # JA3 hash: 4bf8cdd8919b07d35ca824c20efb3537
28+ FUNDA_JA3_EXT21 = "771,4867-4865-4866-52393-52392-49195-49199-49196-49200-49161-49171-49162-49172-156-157-47-53,0-23-65281-10-11-35-13-51-45-43-21,29-23-24,0"
2329
30+ # Fingerprint pool - tried in order until one works
31+ # Types: "tls_ja3", "curl_impersonate", "tls_client"
32+ FINGERPRINT_POOL = [
33+ # tls_client with exact Funda app JA3 fingerprints
34+ {"type" : "tls_ja3" , "ja3" : FUNDA_JA3 },
35+ {"type" : "tls_ja3" , "ja3" : FUNDA_JA3_EXT21 },
36+ # curl_cffi impersonate fallback - Safari works best with Funda headers
37+ {"type" : "curl_impersonate" , "target" : "safari15_5" },
38+ {"type" : "curl_impersonate" , "target" : "safari15_3" },
39+ # tls_client preset profiles as final fallback
40+ {"type" : "tls_client" , "identifier" : "okhttp4_android_13" },
41+ {"type" : "tls_client" , "identifier" : "chrome_120" },
42+ ]
2443
25- def _make_headers (host : str , for_search : bool = False ) -> list [tuple [str , str ]]:
26- """Generate headers matching the Funda Android app."""
44+ # Test endpoint to verify fingerprint works
45+ TEST_URL = f"{ API_BASE } /tinyId/43117443"
46+
47+
48+
49+ def _make_headers (for_search : bool = False ) -> list [tuple [str , str ]]:
50+ """Generate headers matching the Funda Android app exactly.
51+
52+ Header order and values are captured from real Funda app traffic.
53+ """
2754 trace_id = str (random .randint (10 ** 18 , 10 ** 19 ))
2855 parent_id = hex (random .randint (10 ** 15 , 10 ** 16 ))[2 :]
2956 tid = hex (int (time .time ()))[2 :] + "00000000"
3057
58+ # Base headers in exact order from app traffic
3159 headers = [
3260 ("user-agent" , "Dart/3.9 (dart:io)" ),
3361 ("x-datadog-sampling-priority" , "0" ),
@@ -38,25 +66,21 @@ def _make_headers(host: str, for_search: bool = False) -> list[tuple[str, str]]:
3866 ]
3967
4068 if for_search :
41- # Search endpoint uses referer and accept instead of x-funda-app-platform
69+ # Search endpoint: content-type, referer, accept, then traceparent
4270 headers .extend ([
4371 ("content-type" , "application/json" ),
4472 ("referer" , "https://www.funda.nl/" ),
4573 ("accept" , "application/json" ),
4674 ])
4775 else :
48- # Listing endpoint uses x-funda-app-platform
76+ # Listing endpoint: x-funda-app-platform, content-type, then traceparent
4977 headers .extend ([
5078 ("x-funda-app-platform" , "android" ),
5179 ("content-type" , "application/json" ),
5280 ])
5381
54- headers .extend ([
55- ("traceparent" , f"00-{ tid } { trace_id [:16 ]} -{ parent_id } -00" ),
56- ("host" , host ),
57- ("x-datadog-tags" , f"_dd.p.tid={ tid } " ),
58- ("x-datadog-trace-id" , trace_id ),
59- ])
82+ # traceparent is always last
83+ headers .append (("traceparent" , f"00-{ tid } { trace_id [:16 ]} -{ parent_id } -00" ))
6084
6185 return headers
6286
@@ -95,20 +119,145 @@ def __init__(self, timeout: int = 30):
95119 timeout: Request timeout in seconds
96120 """
97121 self .timeout = timeout
98- self ._session : requests .Session | None = None
122+ self ._curl_session : requests .Session | None = None
123+ self ._tls_session : tls_client .Session | None = None
124+ self ._fingerprint : dict | None = None
99125
100- @property
101- def session (self ) -> requests .Session :
102- """Lazily create HTTP session."""
103- if self ._session is None :
104- self ._session = requests .Session ()
105- return self ._session
126+ def _make_headers_dict (self , for_search : bool = False ) -> dict [str , str ]:
127+ """Generate headers as dict for tls_client.
128+
129+ Header order and values match the real Funda Android app exactly.
130+ """
131+ trace_id = str (random .randint (10 ** 18 , 10 ** 19 ))
132+ parent_id = hex (random .randint (10 ** 15 , 10 ** 16 ))[2 :]
133+ tid = hex (int (time .time ()))[2 :] + "00000000"
134+
135+ # Build headers in exact order from app traffic
136+ # Python 3.7+ dicts preserve insertion order
137+ headers = {
138+ "user-agent" : "Dart/3.9 (dart:io)" ,
139+ "x-datadog-sampling-priority" : "0" ,
140+ "x-datadog-origin" : "rum" ,
141+ "tracestate" : f"dd=s:0;o:rum;p:{ parent_id } " ,
142+ "accept-encoding" : "gzip" ,
143+ "x-datadog-parent-id" : trace_id ,
144+ }
145+
146+ if for_search :
147+ # Search endpoint: content-type, referer, accept, then traceparent
148+ headers ["content-type" ] = "application/json"
149+ headers ["referer" ] = "https://www.funda.nl/"
150+ headers ["accept" ] = "application/json"
151+ else :
152+ # Listing endpoint: x-funda-app-platform, content-type, then traceparent
153+ headers ["x-funda-app-platform" ] = "android"
154+ headers ["content-type" ] = "application/json"
155+
156+ # traceparent is always last
157+ headers ["traceparent" ] = f"00-{ tid } { trace_id [:16 ]} -{ parent_id } -00"
158+
159+ return headers
160+
161+ def _test_fingerprint (self , fingerprint : dict ) -> bool :
162+ """Test if a fingerprint works against Funda API."""
163+ try :
164+ fp_type = fingerprint ["type" ]
165+ if fp_type == "tls_ja3" :
166+ # tls_client with custom JA3 - primary method for Funda app fingerprint
167+ session = tls_client .Session (ja3_string = fingerprint ["ja3" ], random_tls_extension_order = False )
168+ headers = self ._make_headers_dict ()
169+ response = session .get (TEST_URL , headers = headers , timeout_seconds = 5 )
170+ elif fp_type == "curl_ja3" :
171+ session = requests .Session ()
172+ headers = _make_headers ()
173+ response = session .get (TEST_URL , headers = headers , ja3 = fingerprint ["ja3" ], timeout = 5 )
174+ session .close ()
175+ elif fp_type == "curl_impersonate" :
176+ session = requests .Session (impersonate = fingerprint ["target" ])
177+ headers = _make_headers ()
178+ response = session .get (TEST_URL , headers = headers , timeout = 5 )
179+ session .close ()
180+ elif fp_type == "tls_client" :
181+ session = tls_client .Session (client_identifier = fingerprint ["identifier" ], random_tls_extension_order = False )
182+ headers = self ._make_headers_dict ()
183+ response = session .get (TEST_URL , headers = headers , timeout_seconds = 5 )
184+ else :
185+ return False
186+ return response .status_code == 200
187+ except Exception :
188+ return False
189+
190+ def _find_working_fingerprint (self ) -> dict :
191+ """Find a working fingerprint from the pool."""
192+ for fp in FINGERPRINT_POOL :
193+ if self ._test_fingerprint (fp ):
194+ return fp
195+ raise RuntimeError ("No working fingerprint found. Funda may have updated their bot detection." )
196+
197+ def _ensure_session (self ) -> None :
198+ """Ensure a working session is created."""
199+ if self ._fingerprint is None :
200+ self ._fingerprint = self ._find_working_fingerprint ()
201+
202+ fp_type = self ._fingerprint ["type" ]
203+ if fp_type == "tls_ja3" :
204+ # tls_client with custom JA3 - exact Funda app fingerprint
205+ if self ._tls_session is None :
206+ self ._tls_session = tls_client .Session (
207+ ja3_string = self ._fingerprint ["ja3" ],
208+ random_tls_extension_order = False
209+ )
210+ elif fp_type == "curl_ja3" :
211+ if self ._curl_session is None :
212+ self ._curl_session = requests .Session ()
213+ elif fp_type == "curl_impersonate" :
214+ if self ._curl_session is None :
215+ self ._curl_session = requests .Session (impersonate = self ._fingerprint ["target" ])
216+ elif fp_type == "tls_client" :
217+ if self ._tls_session is None :
218+ self ._tls_session = tls_client .Session (
219+ client_identifier = self ._fingerprint ["identifier" ],
220+ random_tls_extension_order = False
221+ )
222+
223+ def _get (self , url : str , headers_list : list [tuple [str , str ]]) -> Any :
224+ """Make GET request using the active session."""
225+ self ._ensure_session ()
226+ fp_type = self ._fingerprint ["type" ]
227+
228+ if fp_type in ("tls_ja3" , "tls_client" ):
229+ headers = self ._make_headers_dict ()
230+ return self ._tls_session .get (url , headers = headers , timeout_seconds = self .timeout )
231+ elif fp_type == "curl_ja3" :
232+ return self ._curl_session .get (url , headers = headers_list , ja3 = self ._fingerprint ["ja3" ], timeout = self .timeout )
233+ else :
234+ return self ._curl_session .get (url , headers = headers_list , timeout = self .timeout )
235+
236+ def _post (self , url : str , headers_list : list [tuple [str , str ]], data : str = None , json_data : dict = None , for_search : bool = False ) -> Any :
237+ """Make POST request using the active session."""
238+ self ._ensure_session ()
239+ fp_type = self ._fingerprint ["type" ]
240+
241+ if fp_type in ("tls_ja3" , "tls_client" ):
242+ headers = self ._make_headers_dict (for_search = for_search )
243+ if json_data :
244+ return self ._tls_session .post (url , headers = headers , json = json_data , timeout_seconds = self .timeout )
245+ return self ._tls_session .post (url , headers = headers , data = data , timeout_seconds = self .timeout )
246+ elif fp_type == "curl_ja3" :
247+ if json_data :
248+ return self ._curl_session .post (url , headers = headers_list , json = json_data , ja3 = self ._fingerprint ["ja3" ], timeout = self .timeout )
249+ return self ._curl_session .post (url , headers = headers_list , data = data , ja3 = self ._fingerprint ["ja3" ], timeout = self .timeout )
250+ else :
251+ if json_data :
252+ return self ._curl_session .post (url , headers = headers_list , json = json_data , timeout = self .timeout )
253+ return self ._curl_session .post (url , headers = headers_list , data = data , timeout = self .timeout )
106254
107255 def close (self ) -> None :
108256 """Close the HTTP session."""
109- if self ._session :
110- self ._session .close ()
111- self ._session = None
257+ if self ._curl_session :
258+ self ._curl_session .close ()
259+ self ._curl_session = None
260+ self ._tls_session = None
112261
113262 def __enter__ (self ) -> "Funda" :
114263 return self
@@ -142,26 +291,19 @@ def get_listing(self, listing_id: int | str) -> Listing:
142291
143292 # Try tinyId endpoint first (8-9 digits), then globalId (7 digits)
144293 listing_id_str = str (listing_id )
145- host = "listing-detail-page.funda.io"
146294 if len (listing_id_str ) >= 8 :
147295 url = API_LISTING_TINY .format (tiny_id = listing_id_str )
148296 else :
149297 url = API_LISTING .format (listing_id = listing_id_str )
150298
151- headers = _make_headers (host )
152- response = self .session .get (
153- url , headers = headers , ja3 = FUNDA_JA3 ,
154- http_version = CurlHttpVersion .V1_1 , timeout = self .timeout
155- )
299+ headers = _make_headers ()
300+ response = self ._get (url , headers )
156301
157302 # If tinyId fails, try as globalId
158303 if response .status_code == 404 and len (listing_id_str ) >= 8 :
159304 url = API_LISTING .format (listing_id = listing_id_str )
160- headers = _make_headers (host )
161- response = self .session .get (
162- url , headers = headers , ja3 = FUNDA_JA3 ,
163- http_version = CurlHttpVersion .V1_1 , timeout = self .timeout
164- )
305+ headers = _make_headers ()
306+ response = self ._get (url , headers )
165307
166308 if response .status_code != 200 :
167309 raise LookupError (f"Listing { listing_id } not found" )
@@ -299,17 +441,9 @@ def search_listing(
299441 query = f"{ index_line } \n { query_line } \n "
300442
301443 # Retry on intermittent 400 errors from API
302- host = "listing-search-wonen.funda.io"
303444 for attempt in range (3 ):
304- headers = _make_headers (host , for_search = True )
305- response = self .session .post (
306- API_SEARCH ,
307- headers = headers ,
308- data = query ,
309- ja3 = FUNDA_JA3 ,
310- http_version = CurlHttpVersion .V1_1 ,
311- timeout = self .timeout ,
312- )
445+ headers = _make_headers (for_search = True )
446+ response = self ._post (API_SEARCH , headers , data = query , for_search = True )
313447 if response .status_code == 200 :
314448 break
315449 if response .status_code == 400 and attempt < 2 :
@@ -502,16 +636,12 @@ def poll_new_listings(
502636 """
503637 consecutive_404s = 0
504638 current_id = since_id + 1
505- host = "listing-detail-page.funda.io"
506639
507640 while consecutive_404s < max_consecutive_404s :
508641 url = API_LISTING .format (listing_id = current_id )
509642 try :
510- headers = _make_headers (host )
511- response = self .session .get (
512- url , headers = headers , ja3 = FUNDA_JA3 ,
513- http_version = CurlHttpVersion .V1_1 , timeout = self .timeout
514- )
643+ headers = _make_headers ()
644+ response = self ._get (url , headers )
515645
516646 if response .status_code == 200 :
517647 consecutive_404s = 0
@@ -580,13 +710,11 @@ def get_price_history(self, listing: Listing | str) -> list[dict]:
580710 "zipcode" : postcode ,
581711 }
582712
583- response = self .session .post (
584- API_WALTER ,
585- json = payload ,
586- headers = {"Accept" : "application/json" , "Content-Type" : "application/json" },
587- timeout = self .timeout ,
588- http_version = CurlHttpVersion .V1_1 ,
589- )
713+ walter_headers = [
714+ ("Accept" , "application/json" ),
715+ ("Content-Type" , "application/json" ),
716+ ]
717+ response = self ._post (API_WALTER , walter_headers , json_data = payload )
590718
591719 if response .status_code != 200 :
592720 raise LookupError (f"Could not fetch price history (status { response .status_code } )" )
0 commit comments