Skip to content

Commit b2c19d5

Browse files
authored
fix postgis to return results, filter by states from build process
1 parent a4414ab commit b2c19d5

2 files changed

Lines changed: 52 additions & 37 deletions

File tree

uwpostgis/Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ RUN pip install --break-system-packages --no-cache-dir -r /requirements.txt
6565
# copy database from db_creator
6666
COPY --chmod=700 --from=db_creator /pgdata /pgdata
6767
RUN chown -R 5001:5001 /tmp /var/run/postgresql /pgdata
68+
ARG state_var
69+
RUN echo "$state_var" > /pgdata/states
6870

6971
WORKDIR /opt/palantir/sidecars/shared-volumes/shared/
7072
USER 5001

uwpostgis/process_csv.py

Lines changed: 50 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,39 @@
44
import os
55
import warnings
66
import psycopg
7+
import subprocess
8+
import sys
9+
import time
710
warnings.filterwarnings('ignore')
811

9-
# tested 10/4/24 - JTL
12+
# load states
13+
with open('/pgdata/states', 'r') as f:
14+
states = f.read().split()
15+
print(states)
1016

11-
import_csv = '/opt/palantir/sidecars/shared-volumes/shared/infile.csv'
12-
output_csv = '/opt/palantir/sidecars/shared-volumes/shared/outfile.csv'
17+
# import_csv = '/opt/palantir/sidecars/shared-volumes/shared/infile.csv'
18+
import_csv = sys.argv[1]
19+
print("reading in " + import_csv)
20+
# output_csv = '/opt/palantir/sidecars/shared-volumes/shared/outfile.csv'
21+
output_csv = os.path.dirname(os.path.abspath(import_csv)) + "/outfile.csv"
22+
print("writing to " + output_csv)
23+
24+
## start postgres
25+
subprocess.Popen("postgres")
26+
time.sleep(10)
1327

1428
## import infile
1529
address_df = pd.read_csv(import_csv)
1630

1731
## prepare for output
1832
address_df[['rating','stno','street',
19-
'styp','city','st','zip','lat','lon']] = [None, None, None, None,
20-
None,None, None,
33+
'styp','city','st','zip','lat','lon']] = [None, None, None, None,
34+
None, None, None,
2135
None, None]
2236
address_df = address_df.reset_index(drop=True)
2337

24-
#setup the sql connection
25-
#POSTGRES Connection goes here.
38+
# setup the sql connection
39+
# POSTGRES Connection goes here.
2640
## destination is a Postgres database
2741
dest_user = 'clad_svc'
2842
dest_pw = 'not_on_gitlab'
@@ -36,40 +50,39 @@
3650

3751
## run the loop
3852
for address_var in range(address_df.shape[0]):
53+
if any(state.upper() in str(address_df.address[address_var]).upper() for state in states):
54+
try:
55+
print("processing " + str(address_df.address[address_var]))
56+
string_to_geocode = '''
57+
SELECT
58+
g.rating
59+
, ST_AsText(ST_SnapToGrid(g.geomout,0.00001)) As wktlonlat
60+
, (addy).address As stno
61+
, (addy).streetname As street
62+
, (addy).streettypeabbrev As styp
63+
, (addy).location As city
64+
, (addy).stateabbrev As st
65+
, (addy).zip FROM geocode(''' + "'" + address_df.address[address_var] + "'" + ''') As g;'''
3966

40-
try:
41-
42-
string_to_geocode = '''
43-
SELECT
44-
g.rating
45-
, ST_AsText(ST_SnapToGrid(g.geomout,0.00001)) As wktlonlat
46-
, (addy).address As stno
47-
, (addy).streetname As street
48-
, (addy).streettypeabbrev As styp
49-
, (addy).location As city
50-
, (addy).stateabbrev As st
51-
, (addy).zip FROM geocode(''' + "'" + address_df.address[address_var] + "'" + ''') As g;'''
52-
53-
all_results = pd.read_sql(string_to_geocode, cnxn)
67+
all_results = pd.read_sql(string_to_geocode, cnxn)
5468

55-
# grab first result which will be one with best rating
56-
temp_results = all_results.head(1)
57-
latlong = temp_results['wktlonlat'][0].replace('POINT(','').replace(')','',).split(" ")
58-
print(latlong)
59-
60-
address_df.rating[address_var] = temp_results.rating[0]
61-
address_df.stno[address_var] = temp_results.stno[0]
62-
address_df.street[address_var] = temp_results.street[0]
63-
address_df.styp[address_var] = temp_results.styp[0]
64-
address_df.city[address_var] = temp_results.city[0]
65-
address_df.st[address_var] = temp_results.st[0]
66-
address_df.zip[address_var] = temp_results.zip[0]
67-
address_df.lat[address_var] = latlong[1]
68-
address_df.lon[address_var] = latlong[0]
69+
# grab first result which will be one with best rating
70+
temp_results = all_results.head(1)
71+
latlong = temp_results['wktlonlat'][0].replace('POINT(','').replace(')','',).split(" ")
72+
print(latlong)
6973

70-
except:
71-
pass
74+
address_df.rating[address_var] = temp_results.rating[0]
75+
address_df.stno[address_var] = temp_results.stno[0]
76+
address_df.street[address_var] = temp_results.street[0]
77+
address_df.styp[address_var] = temp_results.styp[0]
78+
address_df.city[address_var] = temp_results.city[0]
79+
address_df.st[address_var] = temp_results.st[0]
80+
address_df.zip[address_var] = temp_results.zip[0]
81+
address_df.lat[address_var] = latlong[1]
82+
address_df.lon[address_var] = latlong[0]
7283

84+
except:
85+
pass
7386

7487
## write outout
7588
address_df.to_csv(output_csv)

0 commit comments

Comments
 (0)