Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
*/
package org.apache.avro.file;

import java.io.IOException;
import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
Expand All @@ -29,9 +29,9 @@
import org.apache.avro.InvalidAvroMagicException;
import org.apache.avro.Schema;
import org.apache.avro.UnknownAvroCodecException;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.BinaryDecoder;

/** Read files written by Avro version 1.2. */
public class DataFileReader12<D> implements FileReader<D>, Closeable {
Expand Down Expand Up @@ -88,7 +88,7 @@ public DataFileReader12(SeekableInput sin, DatumReader<D> reader) throws IOExcep
if (codec != null && !codec.equals(NULL_CODEC)) {
throw new UnknownAvroCodecException("Unknown codec: " + codec);
}
this.schema = new Schema.Parser().parse(getMetaString(SCHEMA));
this.schema = parseSchema();
this.reader = reader;

reader.setSchema(schema);
Expand All @@ -115,6 +115,10 @@ public synchronized long getMetaLong(String key) {
return Long.parseLong(getMetaString(key));
}

private Schema parseSchema() throws IOException {
return DataFileStream.parseSchemaFromMetadata(getMetaString(SCHEMA), SCHEMA, new Schema.Parser());
}

/** Return the schema used in this file. */
@Override
public Schema getSchema() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,7 @@ void initialize(InputStream in, byte[] magic) throws IOException {

// finalize the header
header.metaKeyList = Collections.unmodifiableList(header.metaKeyList);
header.schema = new Schema.Parser(NameValidator.NO_VALIDATION).setValidateDefaults(false)
.parse(getMetaString(DataFileConstants.SCHEMA));
header.schema = parseHeaderSchema();
this.codec = resolveCodec();
reader.setSchema(header.schema);
}
Expand Down Expand Up @@ -198,6 +197,23 @@ public long getMetaLong(String key) {
return Long.parseLong(getMetaString(key));
}

static Schema parseSchemaFromMetadata(String schemaJson, String schemaMetadataKey, Schema.Parser parser)
throws IOException {
if (schemaJson == null) {
throw new IOException("Missing required metadata: " + schemaMetadataKey);
}
try {
return parser.parse(schemaJson);
} catch (AvroRuntimeException e) {
throw new IOException("Invalid schema in metadata: " + schemaMetadataKey, e);
}
}

private Schema parseHeaderSchema() throws IOException {
return parseSchemaFromMetadata(getMetaString(DataFileConstants.SCHEMA), DataFileConstants.SCHEMA,
new Schema.Parser(NameValidator.NO_VALIDATION).setValidateDefaults(false));
}

/**
* Returns an iterator over entries in this file. Note that this iterator is
* shared with other users of the file: it does not contain a separate pointer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,25 @@
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.lang.management.ManagementFactory;
import java.lang.management.OperatingSystemMXBean;
import java.nio.file.Files;
import java.nio.file.Path;
import com.sun.management.UnixOperatingSystemMXBean;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileConstants;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.file.FileReader;
import org.apache.avro.file.SeekableByteArrayInput;
import org.apache.avro.file.SeekableFileInput;
import org.apache.avro.file.SeekableInput;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.EncoderFactory;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

Expand Down Expand Up @@ -238,4 +244,36 @@ void invalidMagicBytes() throws IOException {
() -> DataFileReader.openReader(fileInput, new GenericDatumReader<>()));
}
}

@Test
void missingSchemaMetadataDoesNotThrowNullPointerException() throws IOException {
byte[] malformedFile = buildContainerHeaderWithoutSchema();

IOException streamException = assertThrows(IOException.class,
() -> new DataFileStream<>(new ByteArrayInputStream(malformedFile), new GenericDatumReader<>()));
assertNotNull(streamException.getMessage());
assertTrue(streamException.getMessage().contains(DataFileConstants.SCHEMA));

IOException readerException = assertThrows(IOException.class,
() -> new DataFileReader<>(new SeekableByteArrayInput(malformedFile), new GenericDatumReader<>()));
assertNotNull(readerException.getMessage());
assertTrue(readerException.getMessage().contains(DataFileConstants.SCHEMA));
}

private static byte[] buildContainerHeaderWithoutSchema() throws IOException {
ByteArrayOutputStream output = new ByteArrayOutputStream();
output.write(DataFileConstants.MAGIC);

BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(output, null);
encoder.writeMapStart();
encoder.setItemCount(1);
encoder.startItem();
encoder.writeString(DataFileConstants.CODEC);
encoder.writeBytes("null".getBytes());
encoder.writeMapEnd();
Comment on lines +271 to +273
encoder.writeFixed(new byte[DataFileConstants.SYNC_SIZE]);
encoder.flush();

return output.toByteArray();
}
}
Loading