Skip to content

Commit 0011f3c

Browse files
committed
AVRO-3527: codegen equals and hashCode for Records
Update the compiler to generate the implementation of the `.equals()` and `.hashCode() function, instead of relying on the implementation of GenericData. This improves the performance of those functions significantly. The generated implementations are factor 10 to 20 faster for `.equals()` and a factor 5 to 10 for `.hashCode()`. Result of Perf test before the change: ``` Benchmark Mode Cnt Score Error Units SpecficTest.equals thrpt 3 12598610.194 +/- 11160265.279 ops/s SpecficTest.hashCode thrpt 3 24729446.862 +/- 29051332.794 ops/s ``` Results using generated functions: ``` Benchmark Mode Cnt Score Error Units SpecficTest.equals thrpt 3 211314296.950 +/- 104154793.126 ops/s SpecficTest.hashCode thrpt 3 180349506.632 +/- 143639246.771 ops/s ``` Signed-off-by: Steven Aerts <steven.aerts@gmail.com>
1 parent df7a2c5 commit 0011f3c

12 files changed

Lines changed: 361 additions & 3 deletions

File tree

lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,9 +1181,9 @@ protected int compare(Object o1, Object o2, Schema s, boolean equals) {
11811181
case NULL:
11821182
return 0;
11831183
case STRING:
1184-
Utf8 u1 = o1 instanceof Utf8 ? (Utf8) o1 : new Utf8(o1.toString());
1185-
Utf8 u2 = o2 instanceof Utf8 ? (Utf8) o2 : new Utf8(o2.toString());
1186-
return u1.compareTo(u2);
1184+
CharSequence cs1 = o1 instanceof CharSequence ? (CharSequence) o1 : o1.toString();
1185+
CharSequence cs2 = o2 instanceof CharSequence ? (CharSequence) o2 : o2.toString();
1186+
return Utf8.compareSequences(cs1, cs2);
11871187
default:
11881188
return ((Comparable) o1).compareTo(o2);
11891189
}

lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,4 +237,28 @@ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundExcept
237237
setByteLength(in.readInt());
238238
in.readFully(bytes);
239239
}
240+
241+
public static int compareSequences(CharSequence cs1, CharSequence cs2) {
242+
if (cs1 == cs2) {
243+
return 0;
244+
}
245+
246+
if (cs1 == null || cs2 == null) {
247+
return cs1 == null ? 1 : -1;
248+
}
249+
250+
if (cs1.getClass() == cs2.getClass() && cs1 instanceof Comparable) {
251+
return ((Comparable<Object>) cs1).compareTo(cs2);
252+
}
253+
254+
for (int i = 0, len = Math.min(cs1.length(), cs2.length()); i < len; i++) {
255+
char a = cs1.charAt(i);
256+
char b = cs2.charAt(i);
257+
if (a != b) {
258+
return a - b;
259+
}
260+
}
261+
262+
return cs1.length() - cs2.length();
263+
}
240264
}

lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,15 @@ public void testHashCodeReused() {
9898
assertEquals(3198781, u.hashCode());
9999
}
100100

101+
@Test
102+
public void testHashCodeSameAsString() {
103+
assertEquals("a".hashCode(), new Utf8("a").hashCode());
104+
assertEquals("zz".hashCode(), new Utf8("zz").hashCode());
105+
assertEquals("z".hashCode(), new Utf8("z").hashCode());
106+
assertEquals("hello".hashCode(), new Utf8("hello").hashCode());
107+
assertEquals("hell".hashCode(), new Utf8("hell").hashCode());
108+
}
109+
101110
@Test
102111
public void testSerialization() throws IOException, ClassNotFoundException {
103112
try (ByteArrayOutputStream bos = new ByteArrayOutputStream();

lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,6 +1131,46 @@ public static String mangle(String word, Set<String> reservedWords, boolean isMe
11311131
return word;
11321132
}
11331133

1134+
public boolean canGenerateEqualsAndHashCode(Schema schema) {
1135+
return getUsedCustomLogicalTypeFactories(schema).isEmpty();
1136+
}
1137+
1138+
public boolean isPrimitiveType(Schema schema) {
1139+
return !isUnboxedJavaTypeNullable(schema) && getConvertedLogicalType(schema) == null;
1140+
}
1141+
1142+
public String hashCodeFor(Schema schema, String name) {
1143+
switch (javaUnbox(schema)) {
1144+
case "int":
1145+
return "Integer.hashCode(" + name + ")";
1146+
case "long":
1147+
return "Long.hashCode(" + name + ")";
1148+
case "float":
1149+
return "Float.hashCode(" + name + ")";
1150+
case "double":
1151+
return "Double.hashCode(" + name + ")";
1152+
case "boolean":
1153+
return "Boolean.hashCode(" + name + ")";
1154+
default:
1155+
// Hashcode of Union is expected to match ordinal
1156+
if (schema.getType() == Schema.Type.ENUM || ((schema.getType() == Schema.Type.UNION)
1157+
&& (schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.ENUM)))) {
1158+
if (schema.getType() == Schema.Type.ENUM
1159+
|| (schema.getTypes().size() == 2 && schema.getTypes().contains(NULL_SCHEMA))) {
1160+
return "(" + name + " == null ? 0 : ((java.lang.Enum) " + name + ").ordinal())";
1161+
} else {
1162+
return "(" + name + " == null ? 0 : " + name + " instanceof java.lang.Enum ? ((java.lang.Enum) " + name
1163+
+ ").ordinal() : " + name + ".hashCode())";
1164+
}
1165+
}
1166+
return "(" + name + " == null ? 0 : " + name + ".hashCode())";
1167+
}
1168+
}
1169+
1170+
public boolean ignoredField(Field field) {
1171+
return field.order() == Field.Order.IGNORE;
1172+
}
1173+
11341174
/**
11351175
* Utility for use by templates. Return schema fingerprint as a long.
11361176
*/

lang/java/compiler/src/main/velocity/org/apache/avro/compiler/specific/templates/java/classic/record.vm

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,47 @@ public class ${this.mangleTypeIdentifier($schema.getName())}#if ($schema.isError
604604
}
605605
}
606606
#end
607+
#if ($this.canGenerateEqualsAndHashCode($schema))
608+
609+
@Override
610+
public int hashCode() {
611+
int result = 1;
612+
#foreach ($field in $schema.getFields())
613+
#if (!${this.ignoredField($field)})
614+
#set ($n = ${this.mangle($field.name(), $schema.isError())})
615+
result = 31 * result + ${this.hashCodeFor($field.schema(), $n)};
616+
#end
617+
#end
618+
return result;
619+
}
620+
621+
@Override
622+
public boolean equals(Object o) {
623+
if (this == o) {
624+
return true;
625+
}
626+
if (!(o instanceof ${this.mangleTypeIdentifier($schema.getName())})) {
627+
return false;
628+
}
629+
${this.mangleTypeIdentifier($schema.getName())} other = (${this.mangleTypeIdentifier($schema.getName())}) o;
630+
#foreach ($field in $schema.getFields())
631+
#if (!${this.ignoredField($field)})
632+
#set ($n = ${this.mangle($field.name(), $schema.isError())})
633+
#set ($s = $field.schema())
634+
#if (${this.isPrimitiveType($s)})
635+
if (this.$n != other.$n) {
636+
#elseif (${this.javaType($field.schema()).equals("java.lang.CharSequence")})
637+
if (Utf8.compareSequences(this.$n, other.$n) != 0) {
638+
#else
639+
if (!java.util.Objects.equals(this.$n, other.$n)) {
640+
#end
641+
return false;
642+
}
643+
#end
644+
#end
645+
return true;
646+
}
647+
#end
607648
}
608649

609650
#macro( encodeVar $indent $var $s )

lang/java/tools/src/test/compiler/output-string/avro/examples/baseball/FieldTest.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
@org.apache.avro.specific.AvroGenerated
1717
public class FieldTest extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
1818
private static final long serialVersionUID = 4609235620572341636L;
19+
20+
1921
public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"FieldTest\",\"namespace\":\"avro.examples.baseball\",\"doc\":\"Test various field types\",\"fields\":[{\"name\":\"number\",\"type\":\"int\",\"doc\":\"The number of the player\"},{\"name\":\"last_name\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}},{\"name\":\"timestamp\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}},{\"name\":\"timestampMicros\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"}},{\"name\":\"timeMillis\",\"type\":{\"type\":\"int\",\"logicalType\":\"time-millis\"}},{\"name\":\"timeMicros\",\"type\":{\"type\":\"long\",\"logicalType\":\"time-micros\"}}]}");
2022
public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
2123

@@ -654,6 +656,48 @@ public FieldTest build() {
654656
READER$.read(this, SpecificData.getDecoder(in));
655657
}
656658

659+
660+
@Override
661+
public int hashCode() {
662+
int result = 1;
663+
result = 31 * result + Integer.hashCode(number);
664+
result = 31 * result + (last_name == null ? 0 : last_name.hashCode());
665+
result = 31 * result + (timestamp == null ? 0 : timestamp.hashCode());
666+
result = 31 * result + (timestampMicros == null ? 0 : timestampMicros.hashCode());
667+
result = 31 * result + (timeMillis == null ? 0 : timeMillis.hashCode());
668+
result = 31 * result + (timeMicros == null ? 0 : timeMicros.hashCode());
669+
return result;
670+
}
671+
672+
@Override
673+
public boolean equals(Object o) {
674+
if (this == o) {
675+
return true;
676+
}
677+
if (!(o instanceof FieldTest)) {
678+
return false;
679+
}
680+
FieldTest other = (FieldTest) o;
681+
if (this.number != other.number) {
682+
return false;
683+
}
684+
if (!java.util.Objects.equals(this.last_name, other.last_name)) {
685+
return false;
686+
}
687+
if (!java.util.Objects.equals(this.timestamp, other.timestamp)) {
688+
return false;
689+
}
690+
if (!java.util.Objects.equals(this.timestampMicros, other.timestampMicros)) {
691+
return false;
692+
}
693+
if (!java.util.Objects.equals(this.timeMillis, other.timeMillis)) {
694+
return false;
695+
}
696+
if (!java.util.Objects.equals(this.timeMicros, other.timeMicros)) {
697+
return false;
698+
}
699+
return true;
700+
}
657701
}
658702

659703

lang/java/tools/src/test/compiler/output-string/avro/examples/baseball/Player.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
@org.apache.avro.specific.AvroGenerated
1717
public class Player extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
1818
private static final long serialVersionUID = 3865593031278745715L;
19+
20+
1921
public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Player\",\"namespace\":\"avro.examples.baseball\",\"doc\":\"選手 is Japanese for player.\",\"fields\":[{\"name\":\"number\",\"type\":\"int\",\"doc\":\"The number of the player\"},{\"name\":\"first_name\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}},{\"name\":\"last_name\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}},{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"enum\",\"name\":\"Position\",\"symbols\":[\"P\",\"C\",\"B1\",\"B2\",\"B3\",\"SS\",\"LF\",\"CF\",\"RF\",\"DH\"]}}}]}");
2022
public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
2123

@@ -582,6 +584,40 @@ public Player build() {
582584
}
583585
}
584586
}
587+
588+
@Override
589+
public int hashCode() {
590+
int result = 1;
591+
result = 31 * result + Integer.hashCode(number);
592+
result = 31 * result + (first_name == null ? 0 : first_name.hashCode());
593+
result = 31 * result + (last_name == null ? 0 : last_name.hashCode());
594+
result = 31 * result + (position == null ? 0 : position.hashCode());
595+
return result;
596+
}
597+
598+
@Override
599+
public boolean equals(Object o) {
600+
if (this == o) {
601+
return true;
602+
}
603+
if (!(o instanceof Player)) {
604+
return false;
605+
}
606+
Player other = (Player) o;
607+
if (this.number != other.number) {
608+
return false;
609+
}
610+
if (!java.util.Objects.equals(this.first_name, other.first_name)) {
611+
return false;
612+
}
613+
if (!java.util.Objects.equals(this.last_name, other.last_name)) {
614+
return false;
615+
}
616+
if (!java.util.Objects.equals(this.position, other.position)) {
617+
return false;
618+
}
619+
return true;
620+
}
585621
}
586622

587623

lang/java/tools/src/test/compiler/output/AddExtraOptionalGettersTest.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
@org.apache.avro.specific.AvroGenerated
1717
public class AddExtraOptionalGettersTest extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
1818
private static final long serialVersionUID = -3300987256178011215L;
19+
20+
1921
public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"AddExtraOptionalGettersTest\",\"namespace\":\"avro.examples.baseball\",\"doc\":\"Test that extra optional getters are added\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"favorite_number\",\"type\":[\"int\",\"null\"]}]}");
2022
public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
2123

@@ -423,6 +425,32 @@ public AddExtraOptionalGettersTest build() {
423425
}
424426
}
425427
}
428+
429+
@Override
430+
public int hashCode() {
431+
int result = 1;
432+
result = 31 * result + (name == null ? 0 : name.hashCode());
433+
result = 31 * result + (favorite_number == null ? 0 : favorite_number.hashCode());
434+
return result;
435+
}
436+
437+
@Override
438+
public boolean equals(Object o) {
439+
if (this == o) {
440+
return true;
441+
}
442+
if (!(o instanceof AddExtraOptionalGettersTest)) {
443+
return false;
444+
}
445+
AddExtraOptionalGettersTest other = (AddExtraOptionalGettersTest) o;
446+
if (Utf8.compareSequences(this.name, other.name) != 0) {
447+
return false;
448+
}
449+
if (!java.util.Objects.equals(this.favorite_number, other.favorite_number)) {
450+
return false;
451+
}
452+
return true;
453+
}
426454
}
427455

428456

lang/java/tools/src/test/compiler/output/NoSettersTest.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
@org.apache.avro.specific.AvroGenerated
1717
public class NoSettersTest extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
1818
private static final long serialVersionUID = 8604146783520861700L;
19+
20+
1921
public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"NoSettersTest\",\"namespace\":\"avro.examples.baseball\",\"doc\":\"Test that setters are omitted\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"favorite_number\",\"type\":[\"int\",\"null\"]}]}");
2022
public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
2123

@@ -381,6 +383,32 @@ public NoSettersTest build() {
381383
}
382384
}
383385
}
386+
387+
@Override
388+
public int hashCode() {
389+
int result = 1;
390+
result = 31 * result + (name == null ? 0 : name.hashCode());
391+
result = 31 * result + (favorite_number == null ? 0 : favorite_number.hashCode());
392+
return result;
393+
}
394+
395+
@Override
396+
public boolean equals(Object o) {
397+
if (this == o) {
398+
return true;
399+
}
400+
if (!(o instanceof NoSettersTest)) {
401+
return false;
402+
}
403+
NoSettersTest other = (NoSettersTest) o;
404+
if (Utf8.compareSequences(this.name, other.name) != 0) {
405+
return false;
406+
}
407+
if (!java.util.Objects.equals(this.favorite_number, other.favorite_number)) {
408+
return false;
409+
}
410+
return true;
411+
}
384412
}
385413

386414

lang/java/tools/src/test/compiler/output/OptionalGettersAllFieldsTest.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
@org.apache.avro.specific.AvroGenerated
1717
public class OptionalGettersAllFieldsTest extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
1818
private static final long serialVersionUID = 874861432798554536L;
19+
20+
1921
public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"OptionalGettersAllFieldsTest\",\"namespace\":\"avro.examples.baseball\",\"doc\":\"Test that optional getters are created for all fields\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"nullable_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"favorite_number\",\"type\":[\"int\"]},{\"name\":\"nullable_favorite_number\",\"type\":[\"int\",\"null\"]}]}");
2022
public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
2123

@@ -486,6 +488,40 @@ public OptionalGettersAllFieldsTest build() {
486488
READER$.read(this, SpecificData.getDecoder(in));
487489
}
488490

491+
492+
@Override
493+
public int hashCode() {
494+
int result = 1;
495+
result = 31 * result + (name == null ? 0 : name.hashCode());
496+
result = 31 * result + (nullable_name == null ? 0 : nullable_name.hashCode());
497+
result = 31 * result + (favorite_number == null ? 0 : favorite_number.hashCode());
498+
result = 31 * result + (nullable_favorite_number == null ? 0 : nullable_favorite_number.hashCode());
499+
return result;
500+
}
501+
502+
@Override
503+
public boolean equals(Object o) {
504+
if (this == o) {
505+
return true;
506+
}
507+
if (!(o instanceof OptionalGettersAllFieldsTest)) {
508+
return false;
509+
}
510+
OptionalGettersAllFieldsTest other = (OptionalGettersAllFieldsTest) o;
511+
if (Utf8.compareSequences(this.name, other.name) != 0) {
512+
return false;
513+
}
514+
if (Utf8.compareSequences(this.nullable_name, other.nullable_name) != 0) {
515+
return false;
516+
}
517+
if (!java.util.Objects.equals(this.favorite_number, other.favorite_number)) {
518+
return false;
519+
}
520+
if (!java.util.Objects.equals(this.nullable_favorite_number, other.nullable_favorite_number)) {
521+
return false;
522+
}
523+
return true;
524+
}
489525
}
490526

491527

0 commit comments

Comments
 (0)