Skip to content

Commit fd0506c

Browse files
committed
fix postal code downloader/parser
1 parent 6d0a928 commit fd0506c

8 files changed

Lines changed: 157 additions & 36 deletions

File tree

lib/location.ex

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
NimbleCSV.define(PostCodeCSV, separator: ",", escape: "\~")
12
NimbleCSV.define(LocationCSV, separator: "\t", escape: "\~")
23

34
defmodule Location do

lib/location/postalcode.ex

Lines changed: 142 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,40 +23,26 @@ defmodule Location.PostalCode do
2323
{:decentralized_counters, false}
2424
])
2525

26+
@ets_table_by_id =
27+
:ets.new(@ets_table_by_id, [
28+
:set,
29+
:named_table,
30+
:public,
31+
:compressed,
32+
{:write_concurrency, true},
33+
{:read_concurrency, true},
34+
{:decentralized_counters, false}
35+
])
36+
2637
source_file()
2738
|> File.stream!()
2839
|> Stream.chunk_every(15_000)
2940
|> Task.async_stream(
3041
fn chunk ->
3142
chunk
32-
|> LocationCSV.parse_stream()
33-
|> Stream.each(fn [
34-
country_code,
35-
postal_code,
36-
city_name,
37-
_state_name,
38-
state_code,
39-
_municipality,
40-
_municipality_code,
41-
_admin_name3,
42-
_admin_code3,
43-
latitude,
44-
longitude,
45-
_accuracy
46-
] ->
47-
country_code = String.trim(country_code)
48-
49-
true =
50-
:ets.insert(
51-
@ets_table_by_lookup,
52-
{{country_code, state_code, city_name}, {postal_code, latitude, longitude}}
53-
)
54-
55-
true =
56-
:ets.insert(
57-
@ets_table_by_id,
58-
{postal_code, {country_code, state_code, city_name, latitude, longitude}}
59-
)
43+
|> PostCodeCSV.parse_stream()
44+
|> Stream.each(fn data ->
45+
__MODULE__.parse(data)
6046
end)
6147
|> Stream.run()
6248
end,
@@ -65,6 +51,125 @@ defmodule Location.PostalCode do
6551
|> Stream.run()
6652
end
6753

54+
def parse(data) do
55+
case data do
56+
[
57+
country_code,
58+
postal_code,
59+
city_name,
60+
_state_name,
61+
state_code,
62+
_municipality,
63+
_municipality_code,
64+
_admin_name3,
65+
_admin_code3,
66+
latitude,
67+
longitude,
68+
_accuracy,
69+
_,
70+
_
71+
] ->
72+
country_code = String.trim(country_code)
73+
74+
true =
75+
:ets.insert(
76+
@ets_table_by_lookup,
77+
{{country_code, state_code, city_name}, {postal_code, latitude, longitude}}
78+
)
79+
80+
true =
81+
:ets.insert(
82+
@ets_table_by_id,
83+
{postal_code, {country_code, state_code, city_name, latitude, longitude}}
84+
)
85+
86+
[
87+
country_code,
88+
postal_code,
89+
city_name,
90+
_state_name,
91+
state_code,
92+
_municipality,
93+
_municipality_code,
94+
_admin_name3,
95+
_admin_code3,
96+
latitude,
97+
longitude,
98+
_accuracy,
99+
_
100+
] ->
101+
country_code = String.trim(country_code)
102+
103+
true =
104+
:ets.insert(
105+
@ets_table_by_lookup,
106+
{{country_code, state_code, city_name}, {postal_code, latitude, longitude}}
107+
)
108+
109+
true =
110+
:ets.insert(
111+
@ets_table_by_id,
112+
{postal_code, {country_code, state_code, city_name, latitude, longitude}}
113+
)
114+
115+
[
116+
country_code,
117+
postal_code,
118+
city_name,
119+
_state_name,
120+
state_code,
121+
_municipality,
122+
_municipality_code,
123+
_admin_name3,
124+
_admin_code3,
125+
latitude,
126+
longitude,
127+
_accuracy
128+
] ->
129+
country_code = String.trim(country_code)
130+
131+
true =
132+
:ets.insert(
133+
@ets_table_by_lookup,
134+
{{country_code, state_code, city_name}, {postal_code, latitude, longitude}}
135+
)
136+
137+
true =
138+
:ets.insert(
139+
@ets_table_by_id,
140+
{postal_code, {country_code, state_code, city_name, latitude, longitude}}
141+
)
142+
143+
[
144+
country_code,
145+
postal_code,
146+
city_name,
147+
_state_name,
148+
state_code,
149+
_municipality,
150+
_municipality_code,
151+
_admin_name3,
152+
_admin_code3,
153+
latitude,
154+
longitude
155+
] ->
156+
true =
157+
:ets.insert(
158+
@ets_table_by_lookup,
159+
{{country_code, state_code, city_name}, {postal_code, latitude, longitude}}
160+
)
161+
162+
true =
163+
:ets.insert(
164+
@ets_table_by_id,
165+
{postal_code, {country_code, state_code, city_name, latitude, longitude}}
166+
)
167+
168+
_data ->
169+
:ok
170+
end
171+
end
172+
68173
@doc """
69174
Finds postal_code information by postal code.
70175
"""
@@ -99,6 +204,15 @@ defmodule Location.PostalCode do
99204
end
100205
end
101206

207+
@spec get_postal_codes() :: %__MODULE__{} | nil
208+
def get_postal_codes() do
209+
:ets.tab2list(@ets_table_by_id)
210+
|> Enum.map(fn x ->
211+
{{country_code, state_code, city_name}, {postal_code, latitude, longitude}} = x
212+
to_struct(postal_code, country_code, state_code, city_name, latitude, longitude)
213+
end)
214+
end
215+
102216
defp source_file() do
103217
default = Application.app_dir(:location, "/priv/postal_codes.csv")
104218
Application.get_env(:location, :postal_codes_source_file, default)

lib/mix/tasks/update_geoname_data.ex

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ defmodule Mix.Tasks.Location.UpdateGeonameData do
1111
System.cmd("wget", [@allcountries_src, "-O", "/tmp/allCountries.zip"])
1212
zip_file = Unzip.LocalFile.open("/tmp/allCountries.zip")
1313
{:ok, unzip} = Unzip.new(zip_file)
14+
1415
Unzip.file_stream!(unzip, "allCountries.txt")
1516
|> Stream.into(File.stream!("/tmp/allCountries.txt"))
1617
|> Stream.run()

lib/mix/tasks/update_postal_code_data.ex

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@ defmodule Mix.Tasks.Location.UpdatePostalCodeData do
22
use Mix.Task
33
@shortdoc "Updates the postal code data from source"
44

5-
@destination_filename Application.compile_env(:location, :postal_codes_source_file, "priv/postal_codes.csv")
5+
@destination_filename Application.compile_env(
6+
:location,
7+
:postal_codes_source_file,
8+
"priv/postal_codes.csv"
9+
)
610

711
@doc """
812
The data source clocks in at 16mb. Expect this to take a while.
@@ -46,6 +50,7 @@ defmodule Mix.Tasks.Location.UpdatePostalCodeData do
4650

4751
zip_file = Unzip.LocalFile.open("/tmp/#{name}.zip")
4852
{:ok, unzip} = Unzip.new(zip_file)
53+
4954
Unzip.file_stream!(unzip, "#{name}.txt")
5055
|> Stream.into(File.stream!("/tmp/#{name}.txt"))
5156
|> Stream.run()
@@ -61,18 +66,15 @@ defmodule Mix.Tasks.Location.UpdatePostalCodeData do
6166
filename
6267
|> File.stream!(read_ahead: 100_000)
6368
|> Flow.from_enumerable()
64-
|> Flow.map(&String.split(&1, tab))
69+
|> Flow.map(&(String.trim(&1) |> String.split(tab)))
6570
|> Flow.partition()
6671
|> Enum.into([])
6772

6873
IO.puts("Writing result to #{@destination_filename}")
6974

7075
Location.Scraper.write_date_to_version()
7176

72-
case append do
73-
false -> File.write!(@destination_filename, Enum.join(result, "\n"))
74-
true -> File.write!(@destination_filename, Enum.join(result, "\n"), :append)
75-
end
76-
77+
file = File.open!(@destination_filename, [:write, :utf8])
78+
result |> CSV.encode() |> Enum.each(&IO.write(file, &1))
7779
end
7880
end

location.iml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,5 +77,6 @@
7777
<orderEntry type="library" name="unzip" level="project" />
7878
<orderEntry type="library" name="benchee_html" level="project" />
7979
<orderEntry type="library" name="mox" level="project" />
80+
<orderEntry type="library" name="csv" level="project" />
8081
</component>
8182
</module>

mix.exs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ defmodule Location.MixProject do
5252
{:tesla, "~> 1.8"},
5353
{:hackney, "~> 1.20"},
5454
{:flow, "~> 1.2"},
55-
{:unzip, "0.11.0"}
55+
{:unzip, "0.11.0"},
56+
{:csv, "~> 3.2"}
5657
]
5758
end
5859
end

mix.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
%{
22
"certifi": {:hex, :certifi, "2.14.0", "ed3bef654e69cde5e6c022df8070a579a79e8ba2368a00acf3d75b82d9aceeed", [:rebar3], [], "hexpm", "ea59d87ef89da429b8e905264fdec3419f84f2215bb3d81e07a18aac919026c3"},
3+
"csv": {:hex, :csv, "3.2.2", "452f96414b39a176b7c390af6d8b78f15130dc6167fe3b836729131f515d843e", [:mix], [], "hexpm", "cbf256ff74a3fa01d9ec420d07b19c90d410ed9fe5b6d6e1bc7662edf35bc574"},
34
"floki": {:hex, :floki, "0.35.4", "cc947b446024732c07274ac656600c5c4dc014caa1f8fb2dfff93d275b83890d", [:mix], [], "hexpm", "27fa185d3469bd8fc5947ef0f8d5c4e47f0af02eb6b070b63c868f69e3af0204"},
45
"flow": {:hex, :flow, "1.2.4", "1dd58918287eb286656008777cb32714b5123d3855956f29aa141ebae456922d", [:mix], [{:gen_stage, "~> 1.0", [hex: :gen_stage, repo: "hexpm", optional: false]}], "hexpm", "874adde96368e71870f3510b91e35bc31652291858c86c0e75359cbdd35eb211"},
56
"gen_stage": {:hex, :gen_stage, "1.2.1", "19d8b5e9a5996d813b8245338a28246307fd8b9c99d1237de199d21efc4c76a1", [:mix], [], "hexpm", "83e8be657fa05b992ffa6ac1e3af6d57aa50aace8f691fcf696ff02f8335b001"},

priv/postal_codes.csv

Whitespace-only changes.

0 commit comments

Comments
 (0)