Skip to content

Commit 1ba5fcd

Browse files
download files from EDS instead of PDS (#478)
* download files from EDS instead of PDS * remove integration test * fix file download flow * version bump
1 parent a9f47d7 commit 1ba5fcd

10 files changed

Lines changed: 59 additions & 631 deletions

File tree

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88
The intended audience of this file is for py42 consumers -- as such, changes that don't affect
99
how a consumer would use the library (e.g. adding unit tests, updating documentation, etc) are not captured here.
1010

11+
## 1.29.0 - 2025-05-06
12+
13+
### Fixed
14+
15+
- An issue where file download methods did not work in some conditions.
16+
17+
### Changed
18+
19+
- The internal method `ExfiltratedDataService.get_download_token` now takes only one parameter, downloadRequestUrl. This is the full URL (with parameters) that will be used to request the download token.
20+
1121
## 1.28.0 - 2025-03-21
1222

1323
### Deprecated

src/py42/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# py42
22

3-
__version__ = "1.28.2"
3+
__version__ = "1.29.0"

src/py42/clients/securitydata.py

Lines changed: 9 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
from warnings import warn
23

34
from py42.exceptions import Py42ChecksumNotFoundError
@@ -136,76 +137,23 @@ def _stream_file(self, checksum, version_info):
136137
raise Py42Error(f"No file with hash {checksum} available for download.")
137138

138139
def _get_file_version_for_stream(self, device_guid, md5_hash, sha256_hash, path):
139-
version = self._get_device_file_version(
140-
device_guid, md5_hash, sha256_hash, path
141-
)
142-
if not version:
143-
version = self._get_other_file_location_version(md5_hash, sha256_hash)
144-
return version
145-
146-
def _get_device_file_version(self, device_guid, md5_hash, sha256_hash, path):
147140
response = self._preservation_data_service.get_file_version_list(
148141
device_guid, md5_hash, sha256_hash, path
149142
)
150-
versions = (
151-
response.data.get("securityEventVersionsMatchingChecksum")
152-
or response.data.get("securityEventVersionsAtPath")
153-
or response.data.get("preservationVersions")
154-
)
155-
156-
if versions:
157-
if not response.data.get("securityEventVersionsAtPath"):
158-
exact_match = _get_first_matching_version(versions, md5_hash)
159-
if exact_match:
160-
return exact_match
161-
162-
most_recent = sorted(
163-
versions, key=lambda i: i["versionTimestamp"], reverse=True
164-
)
165-
return most_recent[0]
166-
167-
def _get_other_file_location_version(self, md5_hash, sha256_hash):
168-
response = self._file_event_service.get_file_location_detail_by_sha256(
169-
sha256_hash
170-
)
171-
locations = response["locations"]
172-
if locations:
173-
paths = _parse_file_location_response(locations)
174-
version = self._preservation_data_service.find_file_version(
175-
md5_hash, sha256_hash, paths
176-
)
177-
if version.status_code == 200:
178-
return version.data
143+
return response.data.get("match")
179144

180145
def _get_file_stream(self, version):
181-
if version.get("edsUrl"):
146+
if version.get("downloadTokenRequest"):
182147
return self._get_exfiltrated_file(version)
183148

184-
return self._get_stored_file(version)
149+
raise Py42Error(f"Unable to download file from version {version}")
185150

186151
def _get_exfiltrated_file(self, version):
187-
eds = self._storage_service_factory.create_exfiltrated_data_service(
188-
version["edsUrl"]
189-
)
190-
token = eds.get_download_token(
191-
version["eventId"],
192-
version["deviceUid"],
193-
version["filePath"],
194-
version["fileSHA256"],
195-
version["versionTimestamp"],
196-
)
197-
return eds.get_file(str(token))
198-
199-
def _get_stored_file(self, version):
200-
pds = self._storage_service_factory.create_preservation_data_service(
201-
version["storageNodeURL"]
202-
)
203-
token = pds.get_download_token(
204-
version["archiveGuid"],
205-
version["fileId"],
206-
version["versionTimestamp"],
207-
)
208-
return pds.get_file(str(token))
152+
downloadTokenRequest = version.get("downloadTokenRequest")
153+
edsUrl = re.match(r"(https?://[^/]+)((/.*)|$)", downloadTokenRequest).group(1)
154+
eds = self._storage_service_factory.create_exfiltrated_data_service(edsUrl)
155+
token_response = eds.get_download_token(downloadTokenRequest)
156+
return eds.get_file(token_response.text)
209157

210158

211159
def _parse_file_location_response(locations):

src/py42/sdk/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ def _init_services(main_connection, main_auth, auth_flag=None):
333333
alert_rules_key = "FedObserver-API_URL"
334334
alerts_key = "AlertService-API_URL"
335335
file_events_key = "FORENSIC_SEARCH-API_URL"
336-
preservation_data_key = "PRESERVATION-DATA-SERVICE_API-URL"
336+
preservation_data_key = "EXFILTRATED-DATA-SERVICE_API-URL"
337337
kv_prefix = "simple-key-value-store"
338338
audit_logs_key = "AUDIT-LOG_API-URL"
339339
cases_key = "CASES_API-URL"

src/py42/services/preservationdata.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,8 @@
44

55

66
class PreservationDataService(BaseService):
7-
def find_file_version(self, file_md5, file_sha256, paths):
8-
"""Fetch file version details.
9-
10-
Args:
11-
file_md5 (str): MD5 encoded hash of the file.
12-
file_sha256 (str): SHA256 encoded hash of the file.
13-
paths (str): File path with filename to fetch.
14-
15-
Returns:
16-
:class:`py42.response.Py42Response`
17-
"""
18-
19-
data = {"fileSHA256": file_sha256, "fileMD5": file_md5, "devicePaths": paths}
20-
uri = "/api/v1/FindAvailableVersion"
21-
return self._connection.post(uri, json=data)
22-
237
def get_file_version_list(self, device_id, file_md5, file_sha256, path):
248
params = "fileSHA256={}&fileMD5={}&deviceUid={}&filePath={}"
259
params = params.format(file_sha256, file_md5, device_id, quote(path))
26-
uri = f"/api/v2/file-version-listing?{params}"
10+
uri = f"/api/v3/search-file?{params}"
2711
return self._connection.get(uri)
Lines changed: 7 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,25 @@
1-
from urllib.parse import quote
2-
31
from py42.services import BaseService
42

53

64
class ExfiltratedDataService(BaseService):
75

86
_base_uri = "api/v1/"
97

10-
def __init__(self, main_session, streaming_session):
11-
super().__init__(main_session)
8+
def __init__(self, main_connection, streaming_session):
9+
super().__init__(main_connection)
1210
self._streaming_session = streaming_session
1311

14-
def get_download_token(
15-
self, event_id, device_id, file_path, file_sha256, timestamp
16-
):
12+
def get_download_token(self, downloadRequestUrl):
1713
"""Get EDS download token for a file.
1814
1915
Args:
20-
event_id (str): Id of the file event that references the file desired for download.
21-
device_id (str): Id of the device on which the file desired for download is stored.
22-
file_path (str): Path where the file desired for download resides on the device.
23-
timestamp (int): Last updated timestamp of the file in milliseconds.
16+
downloadRequestUrl (str): The download request url to get the token
2417
2518
Returns:
2619
:class:`py42.response.Py42Response`: A response containing download token for the file.
2720
"""
28-
params = "deviceUid={}&eventId={}&filePath={}&fileSHA256={}&versionTimestamp={}"
29-
params = params.format(
30-
device_id, event_id, quote(file_path), file_sha256, timestamp
31-
)
32-
resource = "file-download-token"
3321
headers = {"Accept": "*/*"}
34-
uri = f"{self._base_uri}{resource}?{params}"
22+
uri = f"{downloadRequestUrl}"
3523
return self._connection.get(uri, headers=headers)
3624

3725
def get_file(self, token):
@@ -43,10 +31,6 @@ def get_file(self, token):
4331
Returns:
4432
Returns a stream of the file indicated by the input token.
4533
"""
46-
resource = "get-file"
47-
uri = f"{self._connection.host_address}/{self._base_uri}{resource}"
48-
params = {"token": token}
34+
uri = f"{token}"
4935
headers = {"Accept": "*/*"}
50-
return self._streaming_session.get(
51-
uri, params=params, headers=headers, stream=True
52-
)
36+
return self._streaming_session.get(uri, headers=headers, stream=True)

0 commit comments

Comments
 (0)