Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hca/dss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ def download_bundle(self, bundle_uuid, version="", metadata_filter=('*',), data_

for file_ in manifest['bundle']['files']:
dss_file = DSSFile.from_dss_bundle_response(file_, self.replica)
filename = file_.get("name", dss_file.uuid)
filename = file_.get("name", dss_file.uuid).replace('!', '/')
walking_dir = bundle_dir

globs = metadata_filter if file_['indexed'] else data_filter
Expand Down
38 changes: 36 additions & 2 deletions test/integration/dss/test_dss_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@
from test import reset_tweak_changes, TEST_DIR


def get_uploaded_file_names(path: str):
files = []
for entry in os.scandir(path=path):
if entry.is_file():
files.append(entry.name)
if entry.is_dir():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if entry.is_dir():
elif entry.is_dir():

and else: assert False at the end.

nested_files = get_uploaded_file_names(entry.path)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is building an excessive number of temporary strings and lists during recursion. Think about how you can make this more efficient. Also check if there isn't built-in functionality for performing a recursive listing of files.

for file in nested_files:
files.append('{}/{}'.format(entry.name, file))
return files


class TestDssApi(unittest.TestCase):
staging_bucket = "org-humancellatlas-dss-cli-test"

Expand Down Expand Up @@ -71,9 +83,31 @@ def test_python_nested_bundle_upload_download(self):
downloaded_file_paths = [object_name_builder(p, dest_dir) for p in downloaded_file_names]
self.assertEqual(uploaded_files.sort(), downloaded_file_paths.sort())

@unittest.skipIf(os.name is 'nt', 'Unable to test on Windows') # TODO windows testing refactor
def test_python_exclamation_replacement(self):
bundle_path = os.path.join(TEST_DIR, "res", "nested_bundle")
uploaded_files = set(get_uploaded_file_names(bundle_path))

bundle_output = self.client.upload(src_dir=bundle_path,
replica="aws",
staging_bucket=self.staging_bucket)

bundle_uuid = bundle_output['bundle_uuid']
bundle_version = bundle_output['version']
bundle_fqid = "{}.{}".format(bundle_uuid, bundle_version)

manifest_files = bundle_output['files']
self.assertEqual({file['name'] for file in manifest_files}, uploaded_files)

with tempfile.TemporaryDirectory() as dest_dir:
self.client.download(bundle_uuid=bundle_output['bundle_uuid'], replica="aws", download_dir=dest_dir)
nested_downloaded_files = [file.name for file in os.scandir('{}/{}/zarr'.format(dest_dir, bundle_fqid))]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you really have to list all files to assert that two of them were downloaded? What about os.path.isfile?

for file in ['exclamation.zattrs', 'nested.zattrs']:
self.assertIn(file, nested_downloaded_files)

def test_python_upload_download(self):
bundle_path = os.path.join(TEST_DIR, "res", "bundle")
uploaded_files = set(os.listdir(bundle_path))
uploaded_files = set(get_uploaded_file_names(bundle_path))

manifest = self.client.upload(src_dir=bundle_path,
replica="aws",
Expand Down Expand Up @@ -139,7 +173,7 @@ def test_python_upload_download(self):

def test_python_manifest_download(self):
bundle_path = os.path.join(TEST_DIR, "res", "bundle")
uploaded_files = set(os.listdir(bundle_path))
uploaded_files = set(get_uploaded_file_names(bundle_path))

manifest = self.client.upload(src_dir=bundle_path,
replica="aws",
Expand Down
4 changes: 4 additions & 0 deletions test/res/nested_bundle/zarr!exclamation.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"README": "The schema adopted in this zarr store may undergo changes in the future",
"sample_id": "0432e9a5-604f-4cb7-8571-014eb5fd8ba2"
}
4 changes: 4 additions & 0 deletions test/res/nested_bundle/zarr/nested.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"README": "The schema adopted in this zarr store may undergo changes in the future",
"sample_id": "0432e9a5-604f-4cb7-8571-014eb5fd8ba2"
}