44import os
55import warnings
66import psycopg
7+ import subprocess
8+ import sys
9+ import time
710warnings .filterwarnings ('ignore' )
811
9- # tested 10/4/24 - JTL
12+ # load states
13+ with open ('/pgdata/states' , 'r' ) as f :
14+ states = f .read ().split ()
15+ print (states )
1016
11- import_csv = '/opt/palantir/sidecars/shared-volumes/shared/infile.csv'
12- output_csv = '/opt/palantir/sidecars/shared-volumes/shared/outfile.csv'
17+ # import_csv = '/opt/palantir/sidecars/shared-volumes/shared/infile.csv'
18+ import_csv = sys .argv [1 ]
19+ print ("reading in " + import_csv )
20+ # output_csv = '/opt/palantir/sidecars/shared-volumes/shared/outfile.csv'
21+ output_csv = os .path .dirname (os .path .abspath (import_csv )) + "/outfile.csv"
22+ print ("writing to " + output_csv )
23+
24+ ## start postgres
25+ subprocess .Popen ("postgres" )
26+ time .sleep (10 )
1327
1428## import infile
1529address_df = pd .read_csv (import_csv )
1630
1731## prepare for output
1832address_df [['rating' ,'stno' ,'street' ,
19- 'styp' ,'city' ,'st' ,'zip' ,'lat' ,'lon' ]] = [None , None , None , None ,
20- None ,None , None ,
33+ 'styp' ,'city' ,'st' ,'zip' ,'lat' ,'lon' ]] = [None , None , None , None ,
34+ None , None , None ,
2135 None , None ]
2236address_df = address_df .reset_index (drop = True )
2337
24- #setup the sql connection
25- #POSTGRES Connection goes here.
38+ # setup the sql connection
39+ # POSTGRES Connection goes here.
2640## destination is a Postgres database
2741dest_user = 'clad_svc'
2842dest_pw = 'not_on_gitlab'
3650
3751## run the loop
3852for address_var in range (address_df .shape [0 ]):
53+ if any (state .upper () in str (address_df .address [address_var ]).upper () for state in states ):
54+ try :
55+ print ("processing " + str (address_df .address [address_var ]))
56+ string_to_geocode = '''
57+ SELECT
58+ g.rating
59+ , ST_AsText(ST_SnapToGrid(g.geomout,0.00001)) As wktlonlat
60+ , (addy).address As stno
61+ , (addy).streetname As street
62+ , (addy).streettypeabbrev As styp
63+ , (addy).location As city
64+ , (addy).stateabbrev As st
65+ , (addy).zip FROM geocode(''' + "'" + address_df .address [address_var ] + "'" + ''') As g;'''
3966
40- try :
41-
42- string_to_geocode = '''
43- SELECT
44- g.rating
45- , ST_AsText(ST_SnapToGrid(g.geomout,0.00001)) As wktlonlat
46- , (addy).address As stno
47- , (addy).streetname As street
48- , (addy).streettypeabbrev As styp
49- , (addy).location As city
50- , (addy).stateabbrev As st
51- , (addy).zip FROM geocode(''' + "'" + address_df .address [address_var ] + "'" + ''') As g;'''
52-
53- all_results = pd .read_sql (string_to_geocode , cnxn )
67+ all_results = pd .read_sql (string_to_geocode , cnxn )
5468
55- # grab first result which will be one with best rating
56- temp_results = all_results .head (1 )
57- latlong = temp_results ['wktlonlat' ][0 ].replace ('POINT(' ,'' ).replace (')' ,'' ,).split (" " )
58- print (latlong )
59-
60- address_df .rating [address_var ] = temp_results .rating [0 ]
61- address_df .stno [address_var ] = temp_results .stno [0 ]
62- address_df .street [address_var ] = temp_results .street [0 ]
63- address_df .styp [address_var ] = temp_results .styp [0 ]
64- address_df .city [address_var ] = temp_results .city [0 ]
65- address_df .st [address_var ] = temp_results .st [0 ]
66- address_df .zip [address_var ] = temp_results .zip [0 ]
67- address_df .lat [address_var ] = latlong [1 ]
68- address_df .lon [address_var ] = latlong [0 ]
69+ # grab first result which will be one with best rating
70+ temp_results = all_results .head (1 )
71+ latlong = temp_results ['wktlonlat' ][0 ].replace ('POINT(' ,'' ).replace (')' ,'' ,).split (" " )
72+ print (latlong )
6973
70- except :
71- pass
74+ address_df .rating [address_var ] = temp_results .rating [0 ]
75+ address_df .stno [address_var ] = temp_results .stno [0 ]
76+ address_df .street [address_var ] = temp_results .street [0 ]
77+ address_df .styp [address_var ] = temp_results .styp [0 ]
78+ address_df .city [address_var ] = temp_results .city [0 ]
79+ address_df .st [address_var ] = temp_results .st [0 ]
80+ address_df .zip [address_var ] = temp_results .zip [0 ]
81+ address_df .lat [address_var ] = latlong [1 ]
82+ address_df .lon [address_var ] = latlong [0 ]
7283
84+ except :
85+ pass
7386
7487## write outout
7588address_df .to_csv (output_csv )
0 commit comments