Skip to content

Commit 052f152

Browse files
committed
initial commit
1 parent eb9b633 commit 052f152

File tree

5 files changed

+166
-1
lines changed

5 files changed

+166
-1
lines changed

.github/workflows/test.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# A github actions workflow for testing the code
2+
name: Test
3+
4+
on:
5+
push:
6+
7+
env:
8+
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
9+
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
10+
AWS_DEFAULT_REGION: us-east-1
11+
DB_HOST: ${{ secrets.DB_HOST }}
12+
13+
jobs:
14+
test:
15+
runs-on: ubuntu-latest
16+
steps:
17+
- uses: actions/checkout@v3
18+
- uses: actions/setup-python@v3
19+
with:
20+
python-version: 3.11
21+
- name: Install dependencies
22+
run: |
23+
python -m pip install --upgrade pip
24+
pip install -r requirements.txt
25+
26+
- name: Run code
27+
run: python main.py

README.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,26 @@
1-
# python-coding-interview-ext
1+
# Python Coding Interview
2+
3+
## Instructions
4+
5+
> We have a business need to build a scheduled task which will flag anomolous in a database given to us by a third party.
6+
>
7+
>
8+
> The main requirements given to our team are:
9+
>
10+
> - We need to connect to a database and retrieve data
11+
> - We need to identify duplicates in the database and flag them as such
12+
> - We need to identify outlier datapoints in the database and flag them as such
13+
> - We do not control the data source, and we have read-only access to the database.
14+
>
15+
> You should create a new branch and iterate on the code in this repository to meet the requirements above. Please leave comments in the code to explain your thought process and any changes you make.
16+
> Push commits to your branch in order to trigger the github actions workflow. You can view the results of the workflow in the [actions tab of the repository](https://github.com/trmlabs/python-coding-interview-ext/actions).
17+
>
18+
19+
### Advanced Instructions
20+
21+
> If you have time, please also consider the following:
22+
>
23+
> - How would you test this code?
24+
> - How can the database connection handling be improved?
25+
> - How would you handle a large amount of data?
26+
>

main.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import os
2+
import json
3+
import boto3
4+
5+
from sqlalchemy import create_engine
6+
from sqlalchemy.orm import sessionmaker
7+
from sqlalchemy.sql import text
8+
9+
from botocore.exceptions import ClientError
10+
11+
12+
class Config:
13+
def __init__(self):
14+
self.secrets_manager = boto3.client('secretsmanager')
15+
16+
def get_secret_from_aws(self, secret_name):
17+
try:
18+
response = self.secrets_manager.get_secret_value(SecretId=secret_name)
19+
if 'SecretString' in response:
20+
return json.loads(response['SecretString'])
21+
return None
22+
except (ClientError,):
23+
return None
24+
25+
def get_config_value(self, key, default_value, secret_name=None):
26+
if secret_name:
27+
aws_secret = self.get_secret_from_aws(secret_name)
28+
if aws_secret and key in aws_secret:
29+
return aws_secret
30+
if key in os.environ:
31+
return os.environ[key]
32+
return default_value
33+
34+
def get_db_config(self):
35+
return {
36+
"name": self.get_config_value("DB_NAME", "postgres", "python_interview_dbname"),
37+
"user": self.get_config_value("DB_USER", "postgres", "python_interview_user"),
38+
"password": self.get_config_value("DB_PASSWORD", "password", "python_interview_password"),
39+
"host": self.get_config_value("DB_HOST", "localhost", "python_interview_host"),
40+
"port": self.get_config_value("DB_PORT", "5432", "python_interview_port")
41+
}
42+
43+
def get_conn_string(self):
44+
db_config = self.get_db_config()
45+
return "postgresql://{user}:{password}@{host}:{port}/{name}".format(
46+
name=db_config['name'],
47+
user=db_config['user'],
48+
password=db_config['password'],
49+
host=db_config['host'],
50+
port=db_config['port']
51+
)
52+
53+
54+
class DatabaseConnection:
55+
def __init__(self, conn_string):
56+
self.engine = create_engine(conn_string)
57+
self.session = sessionmaker(bind=self.engine)
58+
59+
def get_session(self):
60+
new_session = self.session()
61+
return new_session
62+
63+
64+
def dedupe_data(session, table_name):
65+
records = session.execute(text(f"SELECT * FROM {table_name}")).fetchall()
66+
67+
duplicates = []
68+
outliers = []
69+
for record in records:
70+
print(record)
71+
# Insert your anomaly detection logic here
72+
73+
74+
if __name__ == "__main__":
75+
config = Config()
76+
conn_string = config.get_conn_string()
77+
db = DatabaseConnection(conn_string)
78+
session = db.get_session()
79+
80+
dedupe_data(session, "crypto_transactions")

requirements.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
boto3
2+
sqlalchemy
3+
psycopg2-binary

requirements.txt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#
2+
# This file is autogenerated by pip-compile with Python 3.11
3+
# by the following command:
4+
#
5+
# pip-compile requirements.in
6+
#
7+
boto3==1.28.43
8+
# via -r requirements.in
9+
botocore==1.31.43
10+
# via
11+
# boto3
12+
# s3transfer
13+
jmespath==1.0.1
14+
# via
15+
# boto3
16+
# botocore
17+
psycopg2-binary==2.9.7
18+
# via -r requirements.in
19+
python-dateutil==2.8.2
20+
# via botocore
21+
s3transfer==0.6.2
22+
# via boto3
23+
six==1.16.0
24+
# via python-dateutil
25+
sqlalchemy==2.0.20
26+
# via -r requirements.in
27+
typing-extensions==4.7.1
28+
# via sqlalchemy
29+
urllib3==1.26.16
30+
# via botocore

0 commit comments

Comments
 (0)