Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# Changelog
## v1.9.0 3/10/26
- Add capability to upload arbitrary file type to S3

## v1.8.0 8/19/25
- Add optional JSON structured logging

Expand Down
26 changes: 19 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "nypl_py_utils"
version = "1.8.0"
version = "1.9.0"
authors = [
{ name="Aaron Friedman", email="aaronfriedman@nypl.org" },
]
Expand All @@ -24,64 +24,76 @@ dependencies = []

[project.optional-dependencies]
avro-client = [
"nypl_py_utils[log-helper]",
"fastavro>=1.11.1",
"requests>=2.28.1"
]
cloudlibrary-client = [
"nypl_py_utils[log-helper]",
"requests>=2.28.1"
]
kinesis-client = [
"nypl_py_utils[log-helper]",
"boto3>=1.26.5",
"botocore>=1.29.5"
]
kms-client = [
"nypl_py_utils[log-helper]",
"boto3>=1.26.5",
"botocore>=1.29.5"
]
log_helper = [
"structlog>=25.4.0"
]
mysql-client = [
"nypl_py_utils[log-helper]",
"mysql-connector-python>=8.0.32"
]
oauth2-api-client = [
"nypl_py_utils[log-helper]",
"oauthlib>=3.2.2",
"requests_oauthlib>=1.3.1"
]
postgresql-client = [
"nypl_py_utils[log-helper]",
"psycopg[binary]>=3.1.6"
]
redshift-client = [
"nypl_py_utils[log-helper]",
"botocore>=1.29.5",
"redshift-connector>=2.0.909"
]
s3-client = [
"nypl_py_utils[log-helper]",
"boto3>=1.26.5",
"botocore>=1.29.5"
]
secrets-manager-client = [
"nypl_py_utils[log-helper]",
"boto3>=1.26.5",
"botocore>=1.29.5"
]
sftp-client = [
"nypl_py_utils[log-helper]",
"paramiko>=3.4.1"
]
config-helper = [
"nypl_py_utils[kms-client]",
"nypl_py_utils[kms-client,log-helper]",
"PyYAML>=6.0"
]
log-helper = [
"structlog>=25.5.0"
]
obfuscation-helper = [
"nypl_py_utils[log-helper]",
"bcrypt>=4.0.1"
]
patron-data-helper = [
"nypl_py_utils[postgresql-client,redshift-client]>=1.1.5",
"nypl_py_utils[postgresql-client,redshift-client,log-helper]>=1.1.5",
"pandas>=2.2.2"
]
research-catalog-identifier-helper = [
"requests>=2.28.1"
]
development = [
"nypl_py_utils[avro-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,redshift-client,s3-client,secrets-manager-client,sftp-client,config-helper,obfuscation-helper,patron-data-helper,research-catalog-identifier-helper,log_helper]",
"nypl_py_utils[avro-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,redshift-client,s3-client,secrets-manager-client,sftp-client,config-helper,log-helper,obfuscation-helper,patron-data-helper,research-catalog-identifier-helper]",
"flake8>=6.0.0",
"freezegun>=1.2.2",
"mock>=4.0.3",
Expand Down
68 changes: 44 additions & 24 deletions src/nypl_py_utils/classes/s3_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ class S3Client:
"""
Client for fetching and setting an AWS S3 file.

Takes as input the name of the S3 bucket and resource to be fetched/set.
Takes as input the name of the S3 bucket. If fetching/setting a cache, also
takes the cached resource.
"""

def __init__(self, bucket, resource):
def __init__(self, bucket, resource=None):
self.logger = create_log('s3_client')
self.bucket = bucket
self.resource = resource
Expand All @@ -23,49 +24,68 @@ def __init__(self, bucket, resource):
self.s3_client = boto3.client(
's3', region_name=os.environ.get('AWS_REGION', 'us-east-1'))
except ClientError as e:
self.logger.error(
'Could not create S3 client: {err}'.format(err=e))
raise S3ClientError(
'Could not create S3 client: {err}'.format(err=e)) from None
error_msg = f'Could not create S3 client: {e}'
self.logger.error(error_msg)
raise S3ClientError(error_msg) from None

def close(self):
self.s3_client.close()

def fetch_cache(self):
"""Fetches a JSON file from S3 and returns the resulting dictionary"""
self.logger.info('Fetching {file} from S3 bucket {bucket}'.format(
file=self.resource, bucket=self.bucket))
self.logger.info(
f'Fetching {self.resource} from S3 bucket {self.bucket}')
try:
output_stream = BytesIO()
self.s3_client.download_fileobj(
self.bucket, self.resource, output_stream)
return json.loads(output_stream.getvalue())
except ClientError as e:
self.logger.error(
'Error retrieving {file} from S3 bucket {bucket}: {error}'
.format(file=self.resource, bucket=self.bucket, error=e))
raise S3ClientError(
'Error retrieving {file} from S3 bucket {bucket}: {error}'
.format(file=self.resource, bucket=self.bucket, error=e)
) from None
error_msg = (
f'Error retrieving {self.resource} from S3 bucket '
f'{self.bucket}: {e}')
self.logger.error(error_msg)
raise S3ClientError(error_msg) from None

def set_cache(self, state):
"""Writes a dictionary to JSON and uploads the resulting file to S3"""
self.logger.info(
'Setting {file} in S3 bucket {bucket} to {state}'.format(
file=self.resource, bucket=self.bucket, state=state))
f'Setting {self.resource} in S3 bucket {self.bucket} to {state}')
try:
input_stream = BytesIO(json.dumps(state).encode())
self.s3_client.upload_fileobj(
input_stream, self.bucket, self.resource)
except ClientError as e:
self.logger.error(
'Error uploading {file} to S3 bucket {bucket}: {error}'
.format(file=self.resource, bucket=self.bucket, error=e))
raise S3ClientError(
'Error uploading {file} to S3 bucket {bucket}: {error}'
.format(file=self.resource, bucket=self.bucket, error=e)
) from None
error_msg = (
f'Error uploading {self.resource} to S3 bucket '
f'{self.s3_bucket}: {e}')
self.logger.error(error_msg)
raise S3ClientError(error_msg) from None

def upload_file(self, content, file_path):
"""
Writes an arbitrary file to S3. Note that this will overwrite any
existing file with the same name.

Parameters
----------
content: str
The string that should be written to the file. Must be utf-8.
file_path: str
The full path of the file that should be written not including the
bucket. Example: "subdirectory/example_file.csv"
"""
self.logger.info(
f'Writing {file_path} in S3 bucket {self.s3_client.name}')
try:
input_stream = BytesIO(content.encode())
self.s3_client.upload_fileobj(input_stream, self.bucket, file_path)
except ClientError as e:
error_msg = (
f'Error uploading {file_path} to S3 bucket '
f'{self.s3_bucket}: {e}')
self.logger.error(error_msg)
raise S3ClientError(error_msg) from None


class S3ClientError(Exception):
Expand Down
3 changes: 1 addition & 2 deletions src/nypl_py_utils/functions/log_helper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import structlog

import logging
import os
import structlog
import sys

levels = {
Expand Down
7 changes: 7 additions & 0 deletions tests/test_s3_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,10 @@ def test_set_cache(self, test_instance):
assert arguments[0].getvalue() == json.dumps(_TEST_STATE).encode()
assert arguments[1] == 'test_s3_bucket'
assert arguments[2] == 'test_s3_resource'

def test_upload_file(self, test_instance):
test_instance.upload_file('test_content', 'test_filename.txt')
arguments = test_instance.s3_client.upload_fileobj.call_args.args
assert arguments[0].getvalue() == b'test_content'
assert arguments[1] == 'test_s3_bucket'
assert arguments[2] == 'test_filename.txt'