From f98fe6096463d689742e351def70035018106476 Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Tue, 10 Mar 2026 14:21:37 -0400 Subject: [PATCH 1/2] Allow uploading arbitrary files to s3 --- CHANGELOG.md | 3 ++ pyproject.toml | 26 ++++++--- src/nypl_py_utils/classes/s3_client.py | 65 +++++++++++++++-------- src/nypl_py_utils/functions/log_helper.py | 3 +- tests/test_s3_client.py | 7 +++ 5 files changed, 72 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d3e789..bfad344 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,7 @@ # Changelog +## v1.9.0 3/10/26 +- Add capability to upload arbitrary file type to S3 + ## v1.8.0 8/19/25 - Add optional JSON structured logging diff --git a/pyproject.toml b/pyproject.toml index 59df17f..c0a20e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "nypl_py_utils" -version = "1.8.0" +version = "1.9.0" authors = [ { name="Aaron Friedman", email="aaronfriedman@nypl.org" }, ] @@ -24,64 +24,76 @@ dependencies = [] [project.optional-dependencies] avro-client = [ + "nypl_py_utils[log-helper]", "fastavro>=1.11.1", "requests>=2.28.1" ] cloudlibrary-client = [ + "nypl_py_utils[log-helper]", "requests>=2.28.1" ] kinesis-client = [ + "nypl_py_utils[log-helper]", "boto3>=1.26.5", "botocore>=1.29.5" ] kms-client = [ + "nypl_py_utils[log-helper]", "boto3>=1.26.5", "botocore>=1.29.5" ] -log_helper = [ - "structlog>=25.4.0" -] mysql-client = [ + "nypl_py_utils[log-helper]", "mysql-connector-python>=8.0.32" ] oauth2-api-client = [ + "nypl_py_utils[log-helper]", "oauthlib>=3.2.2", "requests_oauthlib>=1.3.1" ] postgresql-client = [ + "nypl_py_utils[log-helper]", "psycopg[binary]>=3.1.6" ] redshift-client = [ + "nypl_py_utils[log-helper]", "botocore>=1.29.5", "redshift-connector>=2.0.909" ] s3-client = [ + "nypl_py_utils[log-helper]", "boto3>=1.26.5", "botocore>=1.29.5" ] secrets-manager-client = [ + "nypl_py_utils[log-helper]", "boto3>=1.26.5", "botocore>=1.29.5" ] sftp-client = [ + "nypl_py_utils[log-helper]", "paramiko>=3.4.1" ] config-helper = [ - "nypl_py_utils[kms-client]", + "nypl_py_utils[kms-client,log-helper]", "PyYAML>=6.0" ] +log-helper = [ + "structlog>=25.5.0" +] obfuscation-helper = [ + "nypl_py_utils[log-helper]", "bcrypt>=4.0.1" ] patron-data-helper = [ - "nypl_py_utils[postgresql-client,redshift-client]>=1.1.5", + "nypl_py_utils[postgresql-client,redshift-client,log-helper]>=1.1.5", "pandas>=2.2.2" ] research-catalog-identifier-helper = [ "requests>=2.28.1" ] development = [ - "nypl_py_utils[avro-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,redshift-client,s3-client,secrets-manager-client,sftp-client,config-helper,obfuscation-helper,patron-data-helper,research-catalog-identifier-helper,log_helper]", + "nypl_py_utils[avro-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,redshift-client,s3-client,secrets-manager-client,sftp-client,config-helper,log-helper,obfuscation-helper,patron-data-helper,research-catalog-identifier-helper]", "flake8>=6.0.0", "freezegun>=1.2.2", "mock>=4.0.3", diff --git a/src/nypl_py_utils/classes/s3_client.py b/src/nypl_py_utils/classes/s3_client.py index af71531..b0c758c 100644 --- a/src/nypl_py_utils/classes/s3_client.py +++ b/src/nypl_py_utils/classes/s3_client.py @@ -14,7 +14,7 @@ class S3Client: Takes as input the name of the S3 bucket and resource to be fetched/set. """ - def __init__(self, bucket, resource): + def __init__(self, bucket, resource=None): self.logger = create_log('s3_client') self.bucket = bucket self.resource = resource @@ -23,49 +23,68 @@ def __init__(self, bucket, resource): self.s3_client = boto3.client( 's3', region_name=os.environ.get('AWS_REGION', 'us-east-1')) except ClientError as e: - self.logger.error( - 'Could not create S3 client: {err}'.format(err=e)) - raise S3ClientError( - 'Could not create S3 client: {err}'.format(err=e)) from None + error_msg = f'Could not create S3 client: {e}' + self.logger.error(error_msg) + raise S3ClientError(error_msg) from None def close(self): self.s3_client.close() def fetch_cache(self): """Fetches a JSON file from S3 and returns the resulting dictionary""" - self.logger.info('Fetching {file} from S3 bucket {bucket}'.format( - file=self.resource, bucket=self.bucket)) + self.logger.info( + f'Fetching {self.resource} from S3 bucket {self.bucket}') try: output_stream = BytesIO() self.s3_client.download_fileobj( self.bucket, self.resource, output_stream) return json.loads(output_stream.getvalue()) except ClientError as e: - self.logger.error( - 'Error retrieving {file} from S3 bucket {bucket}: {error}' - .format(file=self.resource, bucket=self.bucket, error=e)) - raise S3ClientError( - 'Error retrieving {file} from S3 bucket {bucket}: {error}' - .format(file=self.resource, bucket=self.bucket, error=e) - ) from None + error_msg = ( + f'Error retrieving {self.resource} from S3 bucket ' + f'{self.bucket}: {e}') + self.logger.error(error_msg) + raise S3ClientError(error_msg) from None def set_cache(self, state): """Writes a dictionary to JSON and uploads the resulting file to S3""" self.logger.info( - 'Setting {file} in S3 bucket {bucket} to {state}'.format( - file=self.resource, bucket=self.bucket, state=state)) + f'Setting {self.resource} in S3 bucket {self.bucket} to {state}') try: input_stream = BytesIO(json.dumps(state).encode()) self.s3_client.upload_fileobj( input_stream, self.bucket, self.resource) except ClientError as e: - self.logger.error( - 'Error uploading {file} to S3 bucket {bucket}: {error}' - .format(file=self.resource, bucket=self.bucket, error=e)) - raise S3ClientError( - 'Error uploading {file} to S3 bucket {bucket}: {error}' - .format(file=self.resource, bucket=self.bucket, error=e) - ) from None + error_msg = ( + f'Error uploading {self.resource} to S3 bucket ' + f'{self.s3_bucket}: {e}') + self.logger.error(error_msg) + raise S3ClientError(error_msg) from None + + def upload_file(self, content, file_path): + """ + Writes an arbitrary file to S3. Note that this will overwrite any + existing file with the same name. + + Parameters + ---------- + content: str + The string that should be written to the file. Must be utf-8. + file_path: str + The full path of the file that should be written not including the + bucket. Example: "subdirectory/example_file.csv" + """ + self.logger.info( + f'Writing {file_path} in S3 bucket {self.s3_client.name}') + try: + input_stream = BytesIO(content.encode()) + self.s3_client.upload_fileobj(input_stream, self.bucket, file_path) + except ClientError as e: + error_msg = ( + f'Error uploading {file_path} to S3 bucket ' + f'{self.s3_bucket}: {e}') + self.logger.error(error_msg) + raise S3ClientError(error_msg) from None class S3ClientError(Exception): diff --git a/src/nypl_py_utils/functions/log_helper.py b/src/nypl_py_utils/functions/log_helper.py index f196191..1370c64 100644 --- a/src/nypl_py_utils/functions/log_helper.py +++ b/src/nypl_py_utils/functions/log_helper.py @@ -1,7 +1,6 @@ -import structlog - import logging import os +import structlog import sys levels = { diff --git a/tests/test_s3_client.py b/tests/test_s3_client.py index bbb74e0..cdc8293 100644 --- a/tests/test_s3_client.py +++ b/tests/test_s3_client.py @@ -28,3 +28,10 @@ def test_set_cache(self, test_instance): assert arguments[0].getvalue() == json.dumps(_TEST_STATE).encode() assert arguments[1] == 'test_s3_bucket' assert arguments[2] == 'test_s3_resource' + + def test_upload_file(self, test_instance): + test_instance.upload_file('test_content', 'test_filename.txt') + arguments = test_instance.s3_client.upload_fileobj.call_args.args + assert arguments[0].getvalue() == b'test_content' + assert arguments[1] == 'test_s3_bucket' + assert arguments[2] == 'test_filename.txt' From 82ab5ddb192542a41a3069d08201b21784b1d175 Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Tue, 10 Mar 2026 14:23:42 -0400 Subject: [PATCH 2/2] Update client doc --- src/nypl_py_utils/classes/s3_client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nypl_py_utils/classes/s3_client.py b/src/nypl_py_utils/classes/s3_client.py index b0c758c..f1a422d 100644 --- a/src/nypl_py_utils/classes/s3_client.py +++ b/src/nypl_py_utils/classes/s3_client.py @@ -11,7 +11,8 @@ class S3Client: """ Client for fetching and setting an AWS S3 file. - Takes as input the name of the S3 bucket and resource to be fetched/set. + Takes as input the name of the S3 bucket. If fetching/setting a cache, also + takes the cached resource. """ def __init__(self, bucket, resource=None):