-
Notifications
You must be signed in to change notification settings - Fork 156
Expand file tree
/
Copy path__init__.py
More file actions
335 lines (283 loc) · 10.8 KB
/
__init__.py
File metadata and controls
335 lines (283 loc) · 10.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
""" A Fast, Offline Reverse Geocoder in Python
A Python library for offline reverse geocoding. It improves on an existing library
called reverse_geocode developed by Richard Penman.
"""
from __future__ import print_function
__author__ = 'Ajay Thampi'
import os
import sys
import csv
if sys.platform == 'win32':
# Windows C long is 32 bits, and the Python int is too large to fit inside.
# Use the limit appropriate for a 32-bit integer as the max file size
csv.field_size_limit(2**31-1)
else:
csv.field_size_limit(sys.maxsize)
import zipfile
from scipy.spatial import cKDTree as KDTree
from reverse_geocoder import cKDTree_MP as KDTree_MP
import numpy as np
GN_URL = 'http://download.geonames.org/export/dump/'
GN_CITIES1000 = 'cities1000'
GN_ADMIN1 = 'admin1CodesASCII.txt'
GN_ADMIN2 = 'admin2Codes.txt'
# Schema of the GeoNames Cities with Population > 1000
GN_COLUMNS = {
'geoNameId': 0,
'name': 1,
'asciiName': 2,
'alternateNames': 3,
'latitude': 4,
'longitude': 5,
'featureClass': 6,
'featureCode': 7,
'countryCode': 8,
'cc2': 9,
'admin1Code': 10,
'admin2Code': 11,
'admin3Code': 12,
'admin4Code': 13,
'population': 14,
'elevation': 15,
'dem': 16,
'timezone': 17,
'modificationDate': 18
}
# Schema of the GeoNames Admin 1/2 Codes
ADMIN_COLUMNS = {
'concatCodes': 0,
'name': 1,
'asciiName': 2,
'geoNameId': 3
}
# Schema of the cities file created by this library
RG_COLUMNS = [
'lat',
'lon',
'name',
'admin1',
'admin2',
'cc'
]
# Name of cities file created by this library
RG_FILE = 'rg_cities1000.csv'
# WGS-84 major axis in kms
A = 6378.137
# WGS-84 eccentricity squared
E2 = 0.00669437999014
def singleton(cls):
"""
Function to get single instance of the RGeocoder class
"""
instances = {}
def getinstance(**kwargs):
"""
Creates a new RGeocoder instance if not created already
"""
if cls not in instances:
instances[cls] = cls(**kwargs)
return instances[cls]
return getinstance
class RGeocoderImpl(object):
"""
The main reverse geocoder class
"""
def __init__(self, mode=2, verbose=True, stream=None, stream_columns=None):
""" Class Instantiation
Args:`
mode (int): Library supports the following two modes:
- 1 = Single-threaded K-D Tree
- 2 = Multi-threaded K-D Tree (Default)
verbose (bool): For verbose output, set to True
stream (io.StringIO): An in-memory stream of a custom data source
"""
self.mode = mode
self.verbose = verbose
if stream:
coordinates, self.locations = self.load(stream, stream_columns)
else:
coordinates, self.locations = self.extract(rel_path(RG_FILE))
if mode == 1: # Single-process
self.tree = KDTree(coordinates)
else: # Multi-process
self.tree = KDTree_MP.cKDTree_MP(coordinates)
def query(self, coordinates):
"""
Function to query the K-D tree to find the nearest city
Args:
coordinates (list): List of tuple coordinates, i.e. [(latitude, longitude)]
"""
if self.mode == 1:
_, indices = self.tree.query(coordinates, k=1)
else:
_, indices = self.tree.pquery(coordinates, k=1)
return [self.locations[index] for index in indices]
def query_dist(self, coordinates):
"""
Function to query the K-D tree to find the nearest city
Args:
coordinates (list): List of tuple coordinates, i.e. [(latitude, longitude)]
"""
if self.mode == 1:
dists, indices = self.tree.query(coordinates, k=1)
else:
dists, indices = self.tree.pquery(coordinates, k=1)
# in pquery dists returns a list of arrays so get the first element instead of returning array
dists = [dist[0] for dist in dists]
return [(dists[n], self.locations[index]) for (n, index) in enumerate(indices)]
def load(self, stream, stream_columns):
"""
Function that loads a custom data source
Args:
stream (io.StringIO): An in-memory stream of a custom data source.
The format of the stream must be a comma-separated file
with header containing the columns defined in RG_COLUMNS.
"""
if not stream_columns:
stream_columns = RG_COLUMNS
stream_reader = csv.DictReader(stream, delimiter=',')
header = stream_reader.fieldnames
if header != stream_columns:
raise csv.Error('Input must be a comma-separated file with header containing ' + \
'the following columns - %s.\nFound header - %s.\nFor more help, visit: ' % (','.join(stream_columns), ','.join(header)) + \
'https://github.com/thampiman/reverse-geocoder')
# Load all the coordinates and locations
geo_coords, locations = [], []
for row in stream_reader:
geo_coords.append((row['lat'], row['lon']))
locations.append(row)
return geo_coords, locations
def extract(self, local_filename):
"""
Function loads the already extracted GeoNames cities file or downloads and extracts it if
it doesn't exist locally
Args:
local_filename (str): Path to local RG_FILE
"""
if os.path.exists(local_filename):
if self.verbose:
print('Loading formatted geocoded file...')
rows = csv.DictReader(open(local_filename, 'rt'))
else:
rows = self.do_extract(GN_CITIES1000, local_filename)
# Load all the coordinates and locations
geo_coords, locations = [], []
for row in rows:
geo_coords.append((row['lat'], row['lon']))
locations.append(row)
return geo_coords, locations
def do_extract(self, geoname_file, local_filename):
gn_cities_url = GN_URL + geoname_file + '.zip'
gn_admin1_url = GN_URL + GN_ADMIN1
gn_admin2_url = GN_URL + GN_ADMIN2
cities_zipfilename = geoname_file + '.zip'
cities_filename = geoname_file + '.txt'
if not os.path.exists(cities_zipfilename):
if self.verbose:
print('Downloading files from Geoname...')
try: # Python 3
import urllib.request
urllib.request.urlretrieve(gn_cities_url, cities_zipfilename)
urllib.request.urlretrieve(gn_admin1_url, GN_ADMIN1)
urllib.request.urlretrieve(gn_admin2_url, GN_ADMIN2)
except ImportError: # Python 2
import urllib
urllib.urlretrieve(gn_cities_url, cities_zipfilename)
urllib.urlretrieve(gn_admin1_url, GN_ADMIN1)
urllib.urlretrieve(gn_admin2_url, GN_ADMIN2)
if self.verbose:
print('Extracting %s...' % geoname_file)
_z = zipfile.ZipFile(open(cities_zipfilename, 'rb'))
open(cities_filename, 'wb').write(_z.read(cities_filename))
if self.verbose:
print('Loading admin1 codes...')
admin1_map = {}
t_rows = csv.reader(open(GN_ADMIN1, 'rt'), delimiter='\t')
for row in t_rows:
admin1_map[row[ADMIN_COLUMNS['concatCodes']]] = row[ADMIN_COLUMNS['asciiName']]
if self.verbose:
print('Loading admin2 codes...')
admin2_map = {}
for row in csv.reader(open(GN_ADMIN2, 'rt'), delimiter='\t'):
admin2_map[row[ADMIN_COLUMNS['concatCodes']]] = row[ADMIN_COLUMNS['asciiName']]
if self.verbose:
print('Creating formatted geocoded file...')
writer = csv.DictWriter(open(local_filename, 'wt'), fieldnames=RG_COLUMNS)
rows = []
for row in csv.reader(open(cities_filename, 'rt'), \
delimiter='\t', quoting=csv.QUOTE_NONE):
lat = row[GN_COLUMNS['latitude']]
lon = row[GN_COLUMNS['longitude']]
name = row[GN_COLUMNS['asciiName']]
cc = row[GN_COLUMNS['countryCode']]
admin1_c = row[GN_COLUMNS['admin1Code']]
admin2_c = row[GN_COLUMNS['admin2Code']]
cc_admin1 = cc+'.'+admin1_c
cc_admin2 = cc+'.'+admin1_c+'.'+admin2_c
admin1 = ''
admin2 = ''
if cc_admin1 in admin1_map:
admin1 = admin1_map[cc_admin1]
if cc_admin2 in admin2_map:
admin2 = admin2_map[cc_admin2]
write_row = {'lat':lat,
'lon':lon,
'name':name,
'admin1':admin1,
'admin2':admin2,
'cc':cc}
rows.append(write_row)
writer.writeheader()
writer.writerows(rows)
if self.verbose:
print('Removing extracted %s to save space...' % geoname_file)
os.remove(cities_filename)
return rows
@singleton
class RGeocoder(RGeocoderImpl):
pass
def geodetic_in_ecef(geo_coords):
geo_coords = np.asarray(geo_coords).astype(np.float)
lat = geo_coords[:, 0]
lon = geo_coords[:, 1]
lat_r = np.radians(lat)
lon_r = np.radians(lon)
normal = A / (np.sqrt(1 - E2 * (np.sin(lat_r) ** 2)))
x = normal * np.cos(lat_r) * np.cos(lon_r)
y = normal * np.cos(lat_r) * np.sin(lon_r)
z = normal * (1 - E2) * np.sin(lat)
return np.column_stack([x, y, z])
def rel_path(filename):
"""
Function that gets relative path to the filename
"""
return os.path.join(os.getcwd(), os.path.dirname(__file__), filename)
def get(geo_coord, mode=2, verbose=True):
"""
Function to query for a single coordinate
"""
if not isinstance(geo_coord, tuple) or not isinstance(geo_coord[0], float):
raise TypeError('Expecting a tuple')
_rg = RGeocoder(mode=mode, verbose=verbose)
return _rg.query([geo_coord])[0]
def search(geo_coords, mode=2, verbose=True):
"""
Function to query for a list of coordinates
"""
if not isinstance(geo_coords, tuple) and not isinstance(geo_coords, list):
raise TypeError('Expecting a tuple or a tuple/list of tuples')
elif not isinstance(geo_coords[0], tuple):
geo_coords = [geo_coords]
_rg = RGeocoder(mode=mode, verbose=verbose)
return _rg.query(geo_coords)
if __name__ == '__main__':
print('Testing single coordinate through get...')
city = (37.78674, -122.39222)
print('Reverse geocoding 1 city...')
result = get(city)
print(result)
print('Testing coordinates...')
cities = [(51.5214588, -0.1729636), (9.936033, 76.259952), (37.38605, -122.08385)]
print('Reverse geocoding %d cities...' % len(cities))
results = search(cities)
print(results)