Skip to content
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions src/buffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,18 @@ mutable struct Buffer
# the total number of transcoded bytes
transcoded::Int64

function Buffer(size::Integer)
return new(Vector{UInt8}(undef, size), 0, 1, 1, 0)
function Buffer(data::Vector{UInt8}, keepbytes::Integer=length(data))
Comment thread
baumgold marked this conversation as resolved.
Outdated
0 <= keepbytes <= length(data) || throw(ArgumentError("invalid keepbytes: keepbytes must be 0 ≤ keepbytes ≤ length(data), got $keepbytes and length(data)=$(length(data))"))
return new(data, 0, 1, keepbytes+1, 0)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like that we now have a single inner constructor.

Could we do a sanity check here to see if keepbytes is not greater than the length of data and also nonnegative?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

end
end

function Buffer(data::Vector{UInt8})
return new(data, 0, 1, length(data)+1, 0)
end
function Buffer(size::Integer = 0)
return Buffer(Vector{UInt8}(undef, size), 0)
end

function Buffer(data::Base.CodeUnits{UInt8})
return Buffer(Vector{UInt8}(data))
function Buffer(data::Base.CodeUnits{UInt8}, keepbytes::Integer=length(data))
return Buffer(Vector{UInt8}(data), keepbytes)
end

function Base.length(buf::Buffer)
Expand Down Expand Up @@ -199,6 +200,11 @@ function copydata!(buf::Buffer, data::Ptr{UInt8}, nbytes::Integer)
return buf
end

# Copy data from `data` to `buf`.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we make this a docstring?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really want this? All these things are internal.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my mind, it's an internal thing. If you're a user, you don't care about internals. If you care about internals you might as well read the code comments.

That said, if you feel strongly about this I can make them docstrings :-)

function copydata!(buf::Buffer, data::Buffer, nbytes::Integer = length(data))
return copydata!(buf, bufferptr(data), nbytes)
end

# Copy data from `buf` to `data`.
function copydata!(data::Ptr{UInt8}, buf::Buffer, nbytes::Integer)
# NOTE: It's caller's responsibility to ensure that the buffer has at least
Expand Down
7 changes: 3 additions & 4 deletions src/noop.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,9 @@ function Base.transcode(::Type{Noop}, data::ByteData)
return transcode(Noop(), data)
end

function Base.transcode(::Noop, data::ByteData)
# Copy data because the caller may expect the return object is not the same
# as from the input.
return Vector{UInt8}(data)
function Base.transcode(codec::Noop, input::Buffer, output::Buffer = Buffer())
copydata!(output, input)
return output.data
end


Expand Down
60 changes: 52 additions & 8 deletions src/transcode.jl
Comment thread
baumgold marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
# =========

"""
transcode(::Type{C}, data::Vector{UInt8})::Vector{UInt8} where C<:Codec
transcode(
::Type{C},
data::Union{Vector{UInt8},Base.CodeUnits{UInt8}},
)::Vector{UInt8} where {C<:Codec}

Transcode `data` by applying a codec `C()`.

Expand All @@ -27,21 +30,34 @@ julia> String(decompressed)

```
"""
function Base.transcode(::Type{C}, data::ByteData) where C<:Codec
function Base.transcode(::Type{C}, args...) where {C<:Codec}
codec = C()
initialize(codec)
try
return transcode(codec, data)
return transcode(codec, args...)
finally
finalize(codec)
end
end

_default_output_buffer(codec, input) = Buffer(
initial_output_size(
codec,
buffermem(input)
)
)

"""
Comment thread
baumgold marked this conversation as resolved.
transcode(codec::Codec, data::Vector{UInt8})::Vector{UInt8}
transcode(
codec::Codec,
data::Union{Vector{UInt8},Base.CodeUnits{UInt8},Buffer},
[output::Union{Vector{UInt8},Base.CodeUnits{UInt8},Buffer}],
)::Vector{UInt8}

Transcode `data` by applying `codec`.

If `output` is unspecified, then this method will allocate it.

Note that this method does not initialize or finalize `codec`. This is
efficient when you transcode a number of pieces of data, but you need to call
[`TranscodingStreams.initialize`](@ref) and
Expand All @@ -59,7 +75,9 @@ julia> codec = ZlibCompressor();

julia> TranscodingStreams.initialize(codec)

julia> compressed = transcode(codec, data);
julia> compressed = Vector{UInt8}()

julia> transcode(codec, data, compressed);

julia> TranscodingStreams.finalize(codec)

Expand All @@ -76,9 +94,29 @@ julia> String(decompressed)

```
"""
Comment thread
baumgold marked this conversation as resolved.
function Base.transcode(codec::Codec, data::ByteData)
input = Buffer(data)
output = Buffer(initial_output_size(codec, buffermem(input)))
function Base.transcode(
Comment thread
baumgold marked this conversation as resolved.
codec::Codec,
input::Buffer,
output::Union{Buffer,Nothing} = nothing,
)
output = (output === nothing ? _default_output_buffer(codec, input) : initbufer!(output))
transcode!(output, codec, input)
end

"""
transcode!(output::Buffer, codec::Codec, input::Buffer)
Comment thread
baumgold marked this conversation as resolved.

Transcode `input` by applying `codec` and storing the results in `output`.
Note that this method does not initialize or finalize `codec`. This is
efficient when you transcode a number of pieces of data, but you need to call
[`TranscodingStreams.initialize`](@ref) and
[`TranscodingStreams.finalize`](@ref) explicitly.
"""
function transcode!(
output::Buffer,
codec::Codec,
input::Buffer,
)
Comment thread
baumgold marked this conversation as resolved.
Comment thread
baumgold marked this conversation as resolved.
error = Error()
code = startproc(codec, :write, error)
if code === :error
Expand Down Expand Up @@ -121,6 +159,12 @@ function Base.transcode(codec::Codec, data::ByteData)
throw(error[])
end

Base.transcode(codec::Codec, data::Buffer, output::ByteData) =
transcode(codec, data, Buffer(output))

Base.transcode(codec::Codec, data::ByteData, args...) =
transcode(codec, Buffer(data), args...)

# Return the initial output buffer size.
function initial_output_size(codec::Codec, input::Memory)
return max(
Expand Down
17 changes: 17 additions & 0 deletions test/codecnoop.jl
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,26 @@
data = b""
@test transcode(Noop(), data) == data
@test transcode(Noop(), data) !== data
@test transcode(Noop(), data, Vector{UInt8}()) == data
@test transcode(Noop(), data, TranscodingStreams.Buffer(Vector{UInt8}())) == data
@test transcode(Noop(), data, Vector{UInt8}()) !== data
@test transcode(Noop(), data, TranscodingStreams.Buffer(Vector{UInt8}())) !== data
output = Vector{UInt8}()
@test transcode(Noop(), data, output) === output
output = TranscodingStreams.Buffer(Vector{UInt8}())
@test transcode(Noop(), data, output) === output.data

data = b"foo"
@test transcode(Noop(), data) == data
@test transcode(Noop(), data) !== data
@test transcode(Noop(), data, Vector{UInt8}()) == data
@test transcode(Noop(), data, TranscodingStreams.Buffer(Vector{UInt8}())) == data
@test transcode(Noop(), data, Vector{UInt8}()) !== data
@test transcode(Noop(), data, TranscodingStreams.Buffer(Vector{UInt8}())) !== data
output = Vector{UInt8}()
@test transcode(Noop(), data, output) === output
output = TranscodingStreams.Buffer(Vector{UInt8}())
@test transcode(Noop(), data, output) === output.data

TranscodingStreams.test_roundtrip_transcode(Noop, Noop)
TranscodingStreams.test_roundtrip_read(NoopStream, NoopStream)
Expand Down