Skip to content

Commit 4383e1b

Browse files
authored
Add max_size extraction limit for zip bomb protection (#168)
Backport {max_size, N} option from OTP's erl_tar to hex_erl_tar for zip bomb protection. The option limits cumulative extracted size and pre-checks compressed binary inflation. Wire up tarball_max_uncompressed_size and docs_tarball_max_uncompressed_size config options during unpack to enforce limits on inner tarball extraction.
1 parent a71bea5 commit 4383e1b

4 files changed

Lines changed: 216 additions & 24 deletions

File tree

src/hex_erl_tar.erl

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
%% instead of loading them fully into memory
99
%% 6. Default chunk_size to 65536 in add_opts instead of 0 with special case
1010
%% 7. Use compressed instead of compressed_one for file:open for OTP 24 compat
11+
%% 8. Added {max_size, N} extraction option for zip bomb protection
1112
%%
1213
%% OTP commit: 013041bd68c2547848e88963739edea7f0a1a90f
1314
%%
@@ -87,6 +88,8 @@ format_error({invalid_gnu_0_1_sparsemap, Format}) ->
8788
lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format]));
8889
format_error(unsafe_path) ->
8990
"The path points above the current working directory";
91+
format_error(too_big) ->
92+
"Extraction size exceeds the configured max_size limit";
9093
format_error({Name,Reason}) ->
9194
lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
9295
format_error(Atom) when is_atom(Atom) ->
@@ -138,9 +141,80 @@ extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) ->
138141

139142
do_extract(Handle, Opts) when is_list(Opts) ->
140143
Opts2 = extract_opts(Opts),
141-
Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end,
142-
foldl_read(Handle, fun extract1/4, Acc, Opts2).
144+
case maybe_inflate_with_limit(Handle, Opts2) of
145+
{error, _} = Err ->
146+
Err;
147+
{ok, Handle2, Opts3} ->
148+
Acc0 = if Opts3#read_opts.output =:= memory -> []; true -> ok end,
149+
Acc = case Opts3#read_opts.max_size of
150+
infinity -> Acc0;
151+
_ -> {size_tracked, 0, Acc0}
152+
end,
153+
foldl_read(Handle2, fun extract1/4, Acc, Opts3)
154+
end.
155+
156+
maybe_inflate_with_limit({binary, Bin}, #read_opts{max_size=MaxSize}=Opts)
157+
when is_integer(MaxSize), is_binary(Bin) ->
158+
case lists:member(compressed, Opts#read_opts.open_mode) of
159+
true ->
160+
case inflate_with_limit(Bin, MaxSize) of
161+
{ok, Inflated} ->
162+
OpenMode = Opts#read_opts.open_mode -- [compressed],
163+
{ok, {binary, Inflated}, Opts#read_opts{open_mode=OpenMode}};
164+
{error, too_big} ->
165+
{error, too_big}
166+
end;
167+
false ->
168+
{ok, {binary, Bin}, Opts}
169+
end;
170+
maybe_inflate_with_limit(Handle, Opts) ->
171+
{ok, Handle, Opts}.
172+
173+
inflate_with_limit(Bin, MaxSize) ->
174+
Z = zlib:open(),
175+
try
176+
zlib:inflateInit(Z, 31, cut),
177+
inflate_with_limit_loop(Z, Bin, MaxSize, 0, [])
178+
catch
179+
_:_ -> {ok, Bin}
180+
after
181+
zlib:close(Z)
182+
end.
143183

184+
inflate_with_limit_loop(Z, Bin, MaxSize, Total, Acc) ->
185+
case zlib:safeInflate(Z, Bin) of
186+
{finished, Chunks} ->
187+
Size = iolist_size(Chunks),
188+
NewTotal = Total + Size,
189+
if NewTotal > MaxSize -> {error, too_big};
190+
true -> {ok, iolist_to_binary(lists:reverse(Acc, Chunks))}
191+
end;
192+
{continue, Chunks} ->
193+
Size = iolist_size(Chunks),
194+
NewTotal = Total + Size,
195+
if NewTotal > MaxSize -> {error, too_big};
196+
true -> inflate_with_limit_loop(Z, <<>>, MaxSize, NewTotal, [Chunks|Acc])
197+
end
198+
end.
199+
200+
extract1(eof, Reader, _, {size_tracked, _, Acc}) when is_list(Acc) ->
201+
{ok, {ok, lists:reverse(Acc)}, Reader};
202+
extract1(eof, Reader, _, {size_tracked, _, leading_slash}) ->
203+
error_logger:info_msg("erl_tar: removed leading '/' from member names\n"),
204+
{ok, ok, Reader};
205+
extract1(eof, Reader, _, {size_tracked, _, Acc}) ->
206+
{ok, Acc, Reader};
207+
extract1(#tar_header{size=Size}=Header, Reader0, Opts,
208+
{size_tracked, Total, InnerAcc}) ->
209+
NewTotal = Total + Size,
210+
case NewTotal > Opts#read_opts.max_size of
211+
true -> throw({error, too_big});
212+
false -> ok
213+
end,
214+
case extract1(Header, Reader0, Opts, InnerAcc) of
215+
{ok, NewInnerAcc, Reader1} ->
216+
{ok, {size_tracked, NewTotal, NewInnerAcc}, Reader1}
217+
end;
144218
extract1(eof, Reader, _, Acc) when is_list(Acc) ->
145219
{ok, {ok, lists:reverse(Acc)}, Reader};
146220
extract1(eof, Reader, _, leading_slash) ->
@@ -2085,6 +2159,8 @@ extract_opts([verbose|Rest], Opts) ->
20852159
extract_opts(Rest, Opts#read_opts{verbose=true});
20862160
extract_opts([{chunks,N}|Rest], Opts) ->
20872161
extract_opts(Rest, Opts#read_opts{chunk_size=N});
2162+
extract_opts([{max_size,N}|Rest], Opts) ->
2163+
extract_opts(Rest, Opts#read_opts{max_size=N});
20882164
extract_opts([Other|Rest], Opts) ->
20892165
extract_opts(Rest, read_opts([Other], Opts));
20902166
extract_opts([], Opts) ->

src/hex_erl_tar.hrl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
%% 1. Added chunk_size field to #read_opts{} for streaming extraction to disk
33
%% 2. Added {chunks, pos_integer()} to extract_opt() type
44
%% 3. Default chunk_size to 65536 in #add_opts{} instead of 0
5+
%% 4. Added max_size field to #read_opts{} for zip bomb protection
6+
%% 5. Added {max_size, pos_integer() | infinity} to extract_opt() type
57
%%
68
%% OTP commit: 013041bd68c2547848e88963739edea7f0a1a90f
79
%%
@@ -46,7 +48,8 @@
4648
output = file :: 'file' | 'memory',
4749
open_mode = [], %% Open mode options.
4850
verbose = false :: boolean(), %% Verbose on/off.
49-
chunk_size = 65536}). %% Chunk size for streaming to disk.
51+
chunk_size = 65536, %% Chunk size for streaming to disk.
52+
max_size = infinity :: pos_integer() | 'infinity'}).
5053
-type read_opts() :: #read_opts{}.
5154

5255
-type add_opt() :: dereference |
@@ -64,6 +67,7 @@
6467
-type extract_opt() :: {cwd, string()} |
6568
{files, [name_in_archive()]} |
6669
{chunks, pos_integer()} |
70+
{max_size, pos_integer() | infinity} |
6771
compressed |
6872
cooked |
6973
memory |

src/hex_tarball.erl

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ unpack(Input, memory, Config) ->
219219
{ok, FileList} ->
220220
case validate_outer_file_sizes(maps:from_list(FileList)) of
221221
{ok, Files} ->
222-
do_unpack(Files, OuterChecksum, memory);
222+
do_unpack(Files, OuterChecksum, memory, Config);
223223
{error, _} = Error ->
224224
Error
225225
end;
@@ -241,7 +241,7 @@ unpack(Input, Output, Config) ->
241241
ok ->
242242
case read_outer_files(TmpDir) of
243243
{ok, Files} ->
244-
do_unpack(Files, OuterChecksum, Output);
244+
do_unpack(Files, OuterChecksum, Output, Config);
245245
{error, _} = Error ->
246246
Error
247247
end;
@@ -308,7 +308,8 @@ unpack(Tarball, Output) ->
308308
unpack_docs(Input, Output, Config) ->
309309
case check_docs_input_size(Input, Config) of
310310
true ->
311-
unpack_tarball(tar_source(Input), Output);
311+
MaxSize = maps:get(docs_tarball_max_uncompressed_size, Config),
312+
unpack_tarball(tar_source(Input), Output, MaxSize);
312313
false ->
313314
{error, {tarball, too_big}}
314315
end.
@@ -414,14 +415,15 @@ encode_metadata(Meta) ->
414415
iolist_to_binary(Data).
415416

416417
%% @private
417-
do_unpack(Files, OuterChecksum, Output) ->
418+
do_unpack(Files, OuterChecksum, Output, Config) ->
418419
State = #{
419420
inner_checksum => undefined,
420421
outer_checksum => OuterChecksum,
421422
contents => undefined,
422423
files => Files,
423424
metadata => undefined,
424-
output => Output
425+
output => Output,
426+
config => Config
425427
},
426428
State1 = check_files(State),
427429
State2 = check_version(State1),
@@ -437,10 +439,12 @@ finish_unpack(#{
437439
files := Files,
438440
inner_checksum := InnerChecksum,
439441
outer_checksum := OuterChecksum,
440-
output := Output
442+
output := Output,
443+
config := Config
441444
}) ->
442445
_ = maps:get("VERSION", Files),
443446
Contents = maps:get("contents.tar.gz", Files),
447+
MaxUncompressedSize = maps:get(tarball_max_uncompressed_size, Config),
444448

445449
Result = #{
446450
inner_checksum => InnerChecksum,
@@ -452,15 +456,15 @@ finish_unpack(#{
452456
none ->
453457
{ok, Result};
454458
memory ->
455-
case unpack_contents(Contents, memory) of
459+
case unpack_contents(Contents, memory, MaxUncompressedSize) of
456460
{ok, UnpackedContents} ->
457461
{ok, Result#{contents => UnpackedContents}};
458462
{error, Reason} ->
459463
{error, {inner_tarball, Reason}}
460464
end;
461465
_ ->
462466
filelib:ensure_dir(filename:join(Output, "*")),
463-
case unpack_contents(Contents, Output) of
467+
case unpack_contents(Contents, Output, MaxUncompressedSize) of
464468
ok ->
465469
[
466470
try_updating_mtime(filename:join(Output, P))
@@ -474,14 +478,23 @@ finish_unpack(#{
474478
end.
475479

476480
%% @private
477-
unpack_contents({path, ContentsPath}, memory) ->
478-
hex_erl_tar:extract(ContentsPath, [memory, compressed]);
479-
unpack_contents({path, ContentsPath}, Output) ->
480-
hex_erl_tar:extract(ContentsPath, [{cwd, Output}, compressed]);
481-
unpack_contents(ContentsBinary, memory) ->
482-
hex_erl_tar:extract({binary, ContentsBinary}, [memory, compressed]);
483-
unpack_contents(ContentsBinary, Output) ->
484-
hex_erl_tar:extract({binary, ContentsBinary}, [{cwd, Output}, compressed]).
481+
unpack_contents(Contents, Output, MaxSize) ->
482+
Opts =
483+
case Output of
484+
memory -> [memory, compressed];
485+
_ -> [{cwd, Output}, compressed]
486+
end,
487+
Source =
488+
case Contents of
489+
{path, ContentsPath} -> ContentsPath;
490+
ContentsBinary -> {binary, ContentsBinary}
491+
end,
492+
case hex_erl_tar:extract(Source, [{max_size, MaxSize} | Opts]) of
493+
{error, too_big} ->
494+
{error, {too_big_uncompressed, MaxSize}};
495+
Other ->
496+
Other
497+
end.
485498

486499
%% @private
487500
copy_metadata_config(Output, MetadataBinary) ->
@@ -617,17 +630,24 @@ guess_build_tools(Metadata) ->
617630
%%====================================================================
618631

619632
%% @private
620-
unpack_tarball(Source, memory) ->
621-
hex_erl_tar:extract(Source, [memory, compressed]);
622-
unpack_tarball(Source, Output) ->
633+
unpack_tarball(Source, memory, MaxSize) ->
634+
case hex_erl_tar:extract(Source, [memory, compressed, {max_size, MaxSize}]) of
635+
{error, too_big} ->
636+
{error, {tarball, {too_big_uncompressed, MaxSize}}};
637+
Other ->
638+
Other
639+
end;
640+
unpack_tarball(Source, Output, MaxSize) ->
623641
filelib:ensure_dir(filename:join(Output, "*")),
624-
case hex_erl_tar:extract(Source, [{cwd, Output}, compressed]) of
642+
case hex_erl_tar:extract(Source, [{cwd, Output}, compressed, {max_size, MaxSize}]) of
625643
ok ->
626644
[
627645
try_updating_mtime(filename:join(Output, Path))
628646
|| Path <- filelib:wildcard("**", Output)
629647
],
630648
ok;
649+
{error, too_big} ->
650+
{error, {tarball, {too_big_uncompressed, MaxSize}}};
631651
Other ->
632652
Other
633653
end.

test/hex_tarball_SUITE.erl

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@ all() ->
3333
streamed_extract_test,
3434
file_unpack_docs_memory_test,
3535
file_unpack_docs_disk_test,
36-
file_unpack_docs_too_big_test
36+
file_unpack_docs_too_big_test,
37+
too_big_uncompressed_to_unpack_test,
38+
docs_too_big_uncompressed_to_unpack_test,
39+
file_unpack_too_big_uncompressed_test,
40+
file_unpack_docs_too_big_uncompressed_test
3741
].
3842

3943
too_big_to_create_test(_Config) ->
@@ -703,6 +707,94 @@ file_unpack_docs_too_big_test(Config) ->
703707

704708
ok.
705709

710+
too_big_uncompressed_to_unpack_test(CtConfig) ->
711+
BaseDir = ?config(priv_dir, CtConfig),
712+
Metadata = #{
713+
<<"name">> => <<"foo">>,
714+
<<"version">> => <<"1.0.0">>
715+
},
716+
Contents = [{"src/foo.erl", <<"-module(foo).">>}],
717+
{ok, #{tarball := Tarball}} = hex_tarball:create(Metadata, Contents),
718+
719+
%% Uncompressed size limit too small - memory
720+
Config = maps:put(tarball_max_uncompressed_size, 1, hex_core:default_config()),
721+
{error, {inner_tarball, {too_big_uncompressed, 1}}} =
722+
hex_tarball:unpack(Tarball, memory, Config),
723+
724+
%% Uncompressed size limit too small - disk
725+
UnpackDir = filename:join(BaseDir, "too_big_uncompressed"),
726+
{error, {inner_tarball, {too_big_uncompressed, 1}}} =
727+
hex_tarball:unpack(Tarball, UnpackDir, Config),
728+
729+
%% Uncompressed size limit large enough
730+
Config2 = maps:put(tarball_max_uncompressed_size, 10 * 1024 * 1024, hex_core:default_config()),
731+
{ok, _} = hex_tarball:unpack(Tarball, memory, Config2),
732+
ok.
733+
734+
docs_too_big_uncompressed_to_unpack_test(CtConfig) ->
735+
BaseDir = ?config(priv_dir, CtConfig),
736+
Files = [{"index.html", <<"Docs">>}],
737+
{ok, Tarball} = hex_tarball:create_docs(Files),
738+
739+
%% Uncompressed size limit too small - memory
740+
Config = maps:put(docs_tarball_max_uncompressed_size, 1, hex_core:default_config()),
741+
{error, {tarball, {too_big_uncompressed, 1}}} =
742+
hex_tarball:unpack_docs(Tarball, memory, Config),
743+
744+
%% Uncompressed size limit too small - disk
745+
UnpackDir = filename:join(BaseDir, "docs_too_big_uncompressed"),
746+
{error, {tarball, {too_big_uncompressed, 1}}} =
747+
hex_tarball:unpack_docs(Tarball, UnpackDir, Config),
748+
749+
%% Uncompressed size limit large enough
750+
Config2 = maps:put(
751+
docs_tarball_max_uncompressed_size, 10 * 1024 * 1024, hex_core:default_config()
752+
),
753+
{ok, _} = hex_tarball:unpack_docs(Tarball, memory, Config2),
754+
ok.
755+
756+
file_unpack_too_big_uncompressed_test(Config) ->
757+
BaseDir = ?config(priv_dir, Config),
758+
Metadata = #{
759+
<<"name">> => <<"foo">>,
760+
<<"version">> => <<"1.0.0">>
761+
},
762+
Contents = [{"src/foo.erl", <<"-module(foo).">>}],
763+
{ok, #{tarball := Tarball}} = hex_tarball:create(Metadata, Contents),
764+
765+
TarballPath = filename:join(BaseDir, "test_file_too_big_uncompressed.tar"),
766+
ok = file:write_file(TarballPath, Tarball),
767+
768+
%% Memory unpack from file
769+
SmallConfig = maps:put(tarball_max_uncompressed_size, 1, hex_core:default_config()),
770+
{error, {inner_tarball, {too_big_uncompressed, 1}}} =
771+
hex_tarball:unpack({file, TarballPath}, memory, SmallConfig),
772+
773+
%% Disk unpack from file
774+
UnpackDir = filename:join(BaseDir, "file_unpack_too_big_uncompressed"),
775+
{error, {inner_tarball, {too_big_uncompressed, 1}}} =
776+
hex_tarball:unpack({file, TarballPath}, UnpackDir, SmallConfig),
777+
ok.
778+
779+
file_unpack_docs_too_big_uncompressed_test(Config) ->
780+
BaseDir = ?config(priv_dir, Config),
781+
782+
Files = [{"index.html", <<"Docs">>}],
783+
{ok, Tarball} = hex_tarball:create_docs(Files),
784+
TarballPath = filename:join(BaseDir, "docs_big_uncompressed.tar.gz"),
785+
ok = file:write_file(TarballPath, Tarball),
786+
787+
%% Memory unpack from file
788+
SmallConfig = maps:put(docs_tarball_max_uncompressed_size, 1, hex_core:default_config()),
789+
{error, {tarball, {too_big_uncompressed, 1}}} =
790+
hex_tarball:unpack_docs({file, TarballPath}, memory, SmallConfig),
791+
792+
%% Disk unpack from file
793+
UnpackDir = filename:join(BaseDir, "file_unpack_docs_too_big_uncompressed"),
794+
{error, {tarball, {too_big_uncompressed, 1}}} =
795+
hex_tarball:unpack_docs({file, TarballPath}, UnpackDir, SmallConfig),
796+
ok.
797+
706798
%%====================================================================
707799
%% Helpers
708800
%%====================================================================

0 commit comments

Comments
 (0)