|
| 1 | +import re |
1 | 2 | from warnings import warn |
2 | 3 |
|
3 | 4 | from py42.exceptions import Py42ChecksumNotFoundError |
@@ -136,76 +137,23 @@ def _stream_file(self, checksum, version_info): |
136 | 137 | raise Py42Error(f"No file with hash {checksum} available for download.") |
137 | 138 |
|
138 | 139 | def _get_file_version_for_stream(self, device_guid, md5_hash, sha256_hash, path): |
139 | | - version = self._get_device_file_version( |
140 | | - device_guid, md5_hash, sha256_hash, path |
141 | | - ) |
142 | | - if not version: |
143 | | - version = self._get_other_file_location_version(md5_hash, sha256_hash) |
144 | | - return version |
145 | | - |
146 | | - def _get_device_file_version(self, device_guid, md5_hash, sha256_hash, path): |
147 | 140 | response = self._preservation_data_service.get_file_version_list( |
148 | 141 | device_guid, md5_hash, sha256_hash, path |
149 | 142 | ) |
150 | | - versions = ( |
151 | | - response.data.get("securityEventVersionsMatchingChecksum") |
152 | | - or response.data.get("securityEventVersionsAtPath") |
153 | | - or response.data.get("preservationVersions") |
154 | | - ) |
155 | | - |
156 | | - if versions: |
157 | | - if not response.data.get("securityEventVersionsAtPath"): |
158 | | - exact_match = _get_first_matching_version(versions, md5_hash) |
159 | | - if exact_match: |
160 | | - return exact_match |
161 | | - |
162 | | - most_recent = sorted( |
163 | | - versions, key=lambda i: i["versionTimestamp"], reverse=True |
164 | | - ) |
165 | | - return most_recent[0] |
166 | | - |
167 | | - def _get_other_file_location_version(self, md5_hash, sha256_hash): |
168 | | - response = self._file_event_service.get_file_location_detail_by_sha256( |
169 | | - sha256_hash |
170 | | - ) |
171 | | - locations = response["locations"] |
172 | | - if locations: |
173 | | - paths = _parse_file_location_response(locations) |
174 | | - version = self._preservation_data_service.find_file_version( |
175 | | - md5_hash, sha256_hash, paths |
176 | | - ) |
177 | | - if version.status_code == 200: |
178 | | - return version.data |
| 143 | + return response.data.get("match") |
179 | 144 |
|
180 | 145 | def _get_file_stream(self, version): |
181 | | - if version.get("edsUrl"): |
| 146 | + if version.get("downloadTokenRequest"): |
182 | 147 | return self._get_exfiltrated_file(version) |
183 | 148 |
|
184 | | - return self._get_stored_file(version) |
| 149 | + raise Py42Error(f"Unable to download file from version {version}") |
185 | 150 |
|
186 | 151 | def _get_exfiltrated_file(self, version): |
187 | | - eds = self._storage_service_factory.create_exfiltrated_data_service( |
188 | | - version["edsUrl"] |
189 | | - ) |
190 | | - token = eds.get_download_token( |
191 | | - version["eventId"], |
192 | | - version["deviceUid"], |
193 | | - version["filePath"], |
194 | | - version["fileSHA256"], |
195 | | - version["versionTimestamp"], |
196 | | - ) |
197 | | - return eds.get_file(str(token)) |
198 | | - |
199 | | - def _get_stored_file(self, version): |
200 | | - pds = self._storage_service_factory.create_preservation_data_service( |
201 | | - version["storageNodeURL"] |
202 | | - ) |
203 | | - token = pds.get_download_token( |
204 | | - version["archiveGuid"], |
205 | | - version["fileId"], |
206 | | - version["versionTimestamp"], |
207 | | - ) |
208 | | - return pds.get_file(str(token)) |
| 152 | + downloadTokenRequest = version.get("downloadTokenRequest") |
| 153 | + edsUrl = re.match(r"(https?://[^/]+)((/.*)|$)", downloadTokenRequest).group(1) |
| 154 | + eds = self._storage_service_factory.create_exfiltrated_data_service(edsUrl) |
| 155 | + token_response = eds.get_download_token(downloadTokenRequest) |
| 156 | + return eds.get_file(token_response.text) |
209 | 157 |
|
210 | 158 |
|
211 | 159 | def _parse_file_location_response(locations): |
|
0 commit comments