From e2b764c8eaad81834824d73dedafbc8ba2c196e8 Mon Sep 17 00:00:00 2001 From: mattiabasone Date: Fri, 30 Jan 2026 17:17:37 +0100 Subject: [PATCH 1/4] Add AvroTranspiler and test (wip) --- composer.json | 3 +- lang/php/lib/Generator/AvroTranspiler.php | 244 ++++++++++++++++++ .../lib/Generator/AvroTranspilerException.php | 27 ++ lang/php/lib/Schema/AvroEnumSchema.php | 5 +- lang/php/lib/Schema/AvroName.php | 5 + lang/php/lib/Schema/AvroNamedSchema.php | 5 + .../php/test/Generator/AvroTranspilerTest.php | 133 ++++++++++ 7 files changed, 419 insertions(+), 3 deletions(-) create mode 100644 lang/php/lib/Generator/AvroTranspiler.php create mode 100644 lang/php/lib/Generator/AvroTranspilerException.php create mode 100644 lang/php/test/Generator/AvroTranspilerTest.php diff --git a/composer.json b/composer.json index ec5d2a3f37b..7069c0edff6 100644 --- a/composer.json +++ b/composer.json @@ -23,7 +23,8 @@ "issues": "https://issues.apache.org/jira/browse/AVRO" }, "require": { - "php": "^8.1" + "php": "^8.1", + "nikic/php-parser": "^5.7" }, "deps": [ "vendor/phpunit/phpunit" diff --git a/lang/php/lib/Generator/AvroTranspiler.php b/lang/php/lib/Generator/AvroTranspiler.php new file mode 100644 index 00000000000..56219be7d0e --- /dev/null +++ b/lang/php/lib/Generator/AvroTranspiler.php @@ -0,0 +1,244 @@ + */ + private array $registry = []; + + public function __construct() + { + $this->factory = new BuilderFactory(); + $this->printer = new Standard(['shortArraySyntax' => true]); + } + + /** + * @return array Map of filename to file contents + */ + public function translate( + AvroSchema $schema, + string $path, + string $phpNamespace + ): array { + $this->registry = []; + $this->buildRegistry($schema); + + $files = []; + + foreach ($this->registry as $name => $registeredSchema) { + $node = match (true) { + $registeredSchema instanceof AvroEnumSchema => $this->buildEnum( + $registeredSchema, + $phpNamespace, + $registeredSchema->symbols() + ), + $registeredSchema instanceof AvroRecordSchema => $this->buildRecord( + $registeredSchema, + $phpNamespace + ), + default => null + }; + + if (null !== $node) { + $code = <<printer->prettyPrint([$node])} + + PHP; + + $filename = $path.'/'.ucwords($name).'.php'; + $files[$filename] = $code; + } + } + + return $files; + } + + public function buildRegistry(AvroSchema $rootSchema): void + { + $this->collectSchemas($rootSchema); + } + + private function collectSchemas(AvroSchema $schema): void + { + if ($schema instanceof AvroRecordSchema) { + if (!array_key_exists($schema->fullname(), $this->registry)) { + $this->registry[$schema->fullname()] = $schema; + foreach ($schema->fields() as $field) { + $this->collectSchemas($field->type()); + } + } + } elseif ($schema instanceof AvroEnumSchema) { + $this->registry[$schema->fullname()] = $schema; + } elseif ($schema instanceof AvroArraySchema) { + $this->collectSchemas($schema->items()); + } elseif ($schema instanceof AvroMapSchema) { + $this->collectSchemas($schema->values()); + } elseif ($schema instanceof AvroUnionSchema) { + foreach ($schema->schemas() as $unionSchema) { + $this->collectSchemas($unionSchema); + } + } + } + + private function buildRecord( + AvroRecordSchema $avroRecord, + string $phpNamespace + ): Node { + $className = ucwords($avroRecord->name()); + $class = $this->factory->class($className)->makeFinal(); + + foreach ($avroRecord->fields() as $field) { + $phpType = $this->avroTypeToPhp($field->type(), $phpNamespace); + $property = $this->factory->property($field->name()) + ->makePrivate() + ->setType($phpType); + + if ($field->hasDefaultValue()) { + $property->setDefault($this->buildDefault($field->defaultValue())); + } + + $class->addStmt($property); + } + + // Add constructor + $constructor = $this->factory->method('__construct')->makePublic(); + foreach ($avroRecord->fields() as $field) { + $phpType = $this->avroTypeToPhp($field->type(), $phpNamespace); + $param = $this->factory->param($field->name())->setType($phpType); + if ($field->hasDefaultValue()) { + $param->setDefault($this->buildDefault($field->defaultValue())); + } + $constructor->addParam($param); + $constructor->addStmt( + new Node\Expr\Assign( + new Node\Expr\PropertyFetch(new Node\Expr\Variable('this'), $field->name()), + new Node\Expr\Variable($field->name()) + ) + ); + } + $class->addStmt($constructor); + + // Add getters + foreach ($avroRecord->fields() as $field) { + $phpType = $this->avroTypeToPhp($field->type(), $phpNamespace); + $getter = $this->factory->method($field->name()) + ->makePublic() + ->setReturnType($phpType) + ->addStmt( + new Stmt\Return_( + new Node\Expr\PropertyFetch(new Node\Expr\Variable('this'), $field->name()) + ) + ); + $class->addStmt($getter); + } + + return $this->factory->namespace($phpNamespace) + ->addStmt($class) + ->getNode(); + } + + /** + * @param array $values + */ + private function buildEnum( + AvroEnumSchema $avroEnum, + string $phpNamespace, + array $values + ): Node { + $className = ucwords($avroEnum->name()); + $enum = $this->factory->enum($className)->setScalarType('string'); + + foreach ($values as $value) { + $caseName = strtoupper($value); + $enum->addStmt( + $this->factory->enumCase($caseName)->setValue($value) + ); + } + + return $this->factory->namespace($phpNamespace) + ->addStmt($enum) + ->getNode(); + } + + private function avroTypeToPhp(AvroSchema $schema, string $phpNamespace): string + { + return match (true) { + $schema instanceof AvroPrimitiveSchema => $this->avroPrimitiveTypeToPhp($schema), + $schema instanceof AvroArraySchema, $schema instanceof AvroMapSchema => 'array', + $schema instanceof AvroRecordSchema, $schema instanceof AvroEnumSchema => '\\'.$phpNamespace.'\\'.ucwords($schema->name()), + $schema instanceof AvroUnionSchema => $this->unionToPhp($schema, $phpNamespace), + default => 'mixed' + }; + } + + private function avroPrimitiveTypeToPhp(AvroPrimitiveSchema $primitiveSchema): string + { + return match ($primitiveSchema->type()) { + AvroSchema::NULL_TYPE => 'null', + AvroSchema::BOOLEAN_TYPE => 'bool', + AvroSchema::INT_TYPE, AvroSchema::LONG_TYPE => 'int', + AvroSchema::FLOAT_TYPE, AvroSchema::DOUBLE_TYPE => 'float', + AvroSchema::STRING_TYPE, AvroSchema::BYTES_TYPE => 'string', + default => throw new AvroTranspilerException("Unknown primitive type: ".$primitiveSchema->type()), + }; + } + + private function unionToPhp(AvroUnionSchema $union, string $phpNamespace): string + { + $types = []; + foreach ($union->schemas() as $schema) { + $types[] = $this->avroTypeToPhp($schema, $phpNamespace); + } + + return implode('|', array_unique($types)); + } + + private function buildDefault(mixed $value): mixed + { + if (is_array($value)) { + return $this->factory->val($value); + } + + return $value; + } +} diff --git a/lang/php/lib/Generator/AvroTranspilerException.php b/lang/php/lib/Generator/AvroTranspilerException.php new file mode 100644 index 00000000000..ad80640a9db --- /dev/null +++ b/lang/php/lib/Generator/AvroTranspilerException.php @@ -0,0 +1,27 @@ +symbols; } @@ -72,13 +72,14 @@ public function symbols() * @return bool true if the given symbol exists in this * enum schema and false otherwise */ - public function hasSymbol($symbol) + public function hasSymbol($symbol): bool { return in_array($symbol, $this->symbols); } /** * @param int $index + * @throws AvroException * @return string enum schema symbol with the given (zero-based) index */ public function symbolByIndex($index) diff --git a/lang/php/lib/Schema/AvroName.php b/lang/php/lib/Schema/AvroName.php index 8b8f48b6a8d..a83ff88499d 100644 --- a/lang/php/lib/Schema/AvroName.php +++ b/lang/php/lib/Schema/AvroName.php @@ -109,6 +109,11 @@ public function nameAndNamespace(): array return [$this->name, $this->namespace]; } + public function name(): string + { + return $this->name; + } + public function fullname(): string { return $this->fullname; diff --git a/lang/php/lib/Schema/AvroNamedSchema.php b/lang/php/lib/Schema/AvroNamedSchema.php index b34e90f30ca..29fda82b4cd 100644 --- a/lang/php/lib/Schema/AvroNamedSchema.php +++ b/lang/php/lib/Schema/AvroNamedSchema.php @@ -77,6 +77,11 @@ public function toAvro(): string|array return $avro; } + public function name(): string + { + return $this->name->name(); + } + public function qualifiedName(): string { return $this->name->qualifiedName(); diff --git a/lang/php/test/Generator/AvroTranspilerTest.php b/lang/php/test/Generator/AvroTranspilerTest.php new file mode 100644 index 00000000000..3abf1c1973a --- /dev/null +++ b/lang/php/test/Generator/AvroTranspilerTest.php @@ -0,0 +1,133 @@ +transpiler = new AvroTranspiler(); + } + + #[Test] + public function nested_schema_generation(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'MyApp\\Avro\\Generated'); + + self::assertCount(2, $files); + + self::assertArrayHasKey('/generated/Lisp.php', $files); + self::assertArrayHasKey('/generated/Cons.php', $files); + + $expectedLisp = <<value = \$value; + } + public function value(): null|string|\MyApp\Avro\Generated\Cons + { + return \$this->value; + } + } + + PHP; + + self::assertEquals($expectedLisp, $files['/generated/Lisp.php']); + + $expectedLisp = <<car = \$car; + \$this->cdr = \$cdr; + } + public function car(): \MyApp\Avro\Generated\Lisp + { + return \$this->car; + } + public function cdr(): \MyApp\Avro\Generated\Lisp + { + return \$this->cdr; + } + } + + PHP; + self::assertEquals($expectedLisp, $files['/generated/Cons.php']); + } +} From 84e72648e9c495b830c497beb00af2970957b155 Mon Sep 17 00:00:00 2001 From: mattiabasone Date: Fri, 27 Feb 2026 16:51:31 +0100 Subject: [PATCH 2/4] add new functionalities to AvroTranspiler --- lang/php/lib/Generator/AvroTranspiler.php | 123 +- .../php/test/Generator/AvroTranspilerTest.php | 1440 +++++++++++++++++ 2 files changed, 1543 insertions(+), 20 deletions(-) diff --git a/lang/php/lib/Generator/AvroTranspiler.php b/lang/php/lib/Generator/AvroTranspiler.php index 56219be7d0e..2bec01b4c52 100644 --- a/lang/php/lib/Generator/AvroTranspiler.php +++ b/lang/php/lib/Generator/AvroTranspiler.php @@ -56,7 +56,6 @@ public function translate( string $path, string $phpNamespace ): array { - $this->registry = []; $this->buildRegistry($schema); $files = []; @@ -93,30 +92,42 @@ public function translate( return $files; } - public function buildRegistry(AvroSchema $rootSchema): void + private function buildRegistry(AvroSchema $rootSchema): void { + $this->registry = []; $this->collectSchemas($rootSchema); } private function collectSchemas(AvroSchema $schema): void { - if ($schema instanceof AvroRecordSchema) { - if (!array_key_exists($schema->fullname(), $this->registry)) { + switch ($schema::class) { + case AvroRecordSchema::class: + if (!array_key_exists($schema->fullname(), $this->registry)) { + $this->registry[$schema->fullname()] = $schema; + foreach ($schema->fields() as $field) { + $this->collectSchemas($field->type()); + } + } + + break; + case AvroEnumSchema::class: $this->registry[$schema->fullname()] = $schema; - foreach ($schema->fields() as $field) { - $this->collectSchemas($field->type()); + + break; + case AvroArraySchema::class: + $this->collectSchemas($schema->items()); + + break; + case AvroMapSchema::class: + $this->collectSchemas($schema->values()); + + break; + case AvroUnionSchema::class: + foreach ($schema->schemas() as $unionSchema) { + $this->collectSchemas($unionSchema); } - } - } elseif ($schema instanceof AvroEnumSchema) { - $this->registry[$schema->fullname()] = $schema; - } elseif ($schema instanceof AvroArraySchema) { - $this->collectSchemas($schema->items()); - } elseif ($schema instanceof AvroMapSchema) { - $this->collectSchemas($schema->values()); - } elseif ($schema instanceof AvroUnionSchema) { - foreach ($schema->schemas() as $unionSchema) { - $this->collectSchemas($unionSchema); - } + + break; } } @@ -133,6 +144,11 @@ private function buildRecord( ->makePrivate() ->setType($phpType); + $phpDocType = $this->avroTypeToPhpDoc($field->type(), $phpNamespace); + if (null !== $phpDocType) { + $property->setDocComment('/** @var '.$phpDocType.' */'); + } + if ($field->hasDefaultValue()) { $property->setDefault($this->buildDefault($field->defaultValue())); } @@ -140,14 +156,20 @@ private function buildRecord( $class->addStmt($property); } - // Add constructor $constructor = $this->factory->method('__construct')->makePublic(); + $constructorParamDocs = []; foreach ($avroRecord->fields() as $field) { $phpType = $this->avroTypeToPhp($field->type(), $phpNamespace); $param = $this->factory->param($field->name())->setType($phpType); if ($field->hasDefaultValue()) { $param->setDefault($this->buildDefault($field->defaultValue())); } + + $phpDocType = $this->avroTypeToPhpDoc($field->type(), $phpNamespace); + if (null !== $phpDocType) { + $constructorParamDocs[] = '@param '.$phpDocType.' $'.$field->name(); + } + $constructor->addParam($param); $constructor->addStmt( new Node\Expr\Assign( @@ -156,9 +178,16 @@ private function buildRecord( ) ); } + if ([] !== $constructorParamDocs) { + $docLines = "/**\n"; + foreach ($constructorParamDocs as $doc) { + $docLines .= ' * '.$doc."\n"; + } + $docLines .= ' */'; + $constructor->setDocComment($docLines); + } $class->addStmt($constructor); - // Add getters foreach ($avroRecord->fields() as $field) { $phpType = $this->avroTypeToPhp($field->type(), $phpNamespace); $getter = $this->factory->method($field->name()) @@ -169,6 +198,12 @@ private function buildRecord( new Node\Expr\PropertyFetch(new Node\Expr\Variable('this'), $field->name()) ) ); + + $phpDocType = $this->avroTypeToPhpDoc($field->type(), $phpNamespace); + if (null !== $phpDocType) { + $getter->setDocComment('/** @return '.$phpDocType.' */'); + } + $class->addStmt($getter); } @@ -178,7 +213,7 @@ private function buildRecord( } /** - * @param array $values + * @param list $values */ private function buildEnum( AvroEnumSchema $avroEnum, @@ -241,4 +276,52 @@ private function buildDefault(mixed $value): mixed return $value; } + + /** + * Returns a PHPDoc type string for schemas that need richer type info than + * what PHP's native type system can express (arrays and maps), or null when + * the native type hint is sufficient. + */ + private function avroTypeToPhpDoc(AvroSchema $schema, string $phpNamespace): ?string + { + return match (true) { + $schema instanceof AvroArraySchema => 'list<'.$this->avroTypeToPhpDocInner($schema->items(), $phpNamespace).'>', + $schema instanceof AvroMapSchema => 'arrayavroTypeToPhpDocInner($schema->values(), $phpNamespace).'>', + $schema instanceof AvroUnionSchema => $this->unionToPhpDoc($schema, $phpNamespace), + default => null, + }; + } + + private function avroTypeToPhpDocInner(AvroSchema $schema, string $phpNamespace): string + { + return match (true) { + $schema instanceof AvroPrimitiveSchema => $this->avroPrimitiveTypeToPhp($schema), + $schema instanceof AvroArraySchema => 'list<'.$this->avroTypeToPhpDocInner($schema->items(), $phpNamespace).'>', + $schema instanceof AvroMapSchema => 'arrayavroTypeToPhpDocInner($schema->values(), $phpNamespace).'>', + $schema instanceof AvroRecordSchema, $schema instanceof AvroEnumSchema => '\\'.$phpNamespace.'\\'.ucwords($schema->name()), + $schema instanceof AvroUnionSchema => $this->unionToPhp($schema, $phpNamespace), + default => 'mixed', + }; + } + + private function unionToPhpDoc(AvroUnionSchema $union, string $phpNamespace): ?string + { + $hasArrayOrMap = false; + $docParts = []; + + foreach ($union->schemas() as $schema) { + if ($schema instanceof AvroArraySchema || $schema instanceof AvroMapSchema) { + $hasArrayOrMap = true; + $docParts[] = $this->avroTypeToPhpDocInner($schema, $phpNamespace); + } else { + $docParts[] = $this->avroTypeToPhp($schema, $phpNamespace); + } + } + + if (!$hasArrayOrMap) { + return null; + } + + return implode('|', array_unique($docParts)); + } } diff --git a/lang/php/test/Generator/AvroTranspilerTest.php b/lang/php/test/Generator/AvroTranspilerTest.php index 3abf1c1973a..df0fdc05cbe 100644 --- a/lang/php/test/Generator/AvroTranspilerTest.php +++ b/lang/php/test/Generator/AvroTranspilerTest.php @@ -130,4 +130,1444 @@ public function cdr(): \MyApp\Avro\Generated\Lisp PHP; self::assertEquals($expectedLisp, $files['/generated/Cons.php']); } + + #[Test] + public function simple_record_with_primitive_types(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Model'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/User.php', $files); + + $expected = <<name = \$name; + \$this->age = \$age; + \$this->active = \$active; + \$this->score = \$score; + } + public function name(): string + { + return \$this->name; + } + public function age(): int + { + return \$this->age; + } + public function active(): bool + { + return \$this->active; + } + public function score(): float + { + return \$this->score; + } + } + + PHP; + + self::assertEquals($expected, $files['/generated/User.php']); + } + + #[Test] + public function enum_schema_generation(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Enums'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/Color.php', $files); + + $expected = <<transpiler->translate($avroSchema, '/generated', 'App\\Config'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/Config.php', $files); + + $expected = <<retries = \$retries; + \$this->label = \$label; + \$this->enabled = \$enabled; + } + public function retries(): int + { + return \$this->retries; + } + public function label(): string + { + return \$this->label; + } + public function enabled(): bool + { + return \$this->enabled; + } + } + + PHP; + + self::assertEquals($expected, $files['/generated/Config.php']); + } + + #[Test] + public function record_with_array_field(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Music'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/Playlist.php', $files); + + $expected = << */ + private array \$tags; + /** + * @param list \$tags + */ + public function __construct(string \$name, array \$tags) + { + \$this->name = \$name; + \$this->tags = \$tags; + } + public function name(): string + { + return \$this->name; + } + /** @return list */ + public function tags(): array + { + return \$this->tags; + } + } + + PHP; + + self::assertEquals($expected, $files['/generated/Playlist.php']); + } + + #[Test] + public function record_with_map_field(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Data'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/Metadata.php', $files); + + $expected = << */ + private array \$properties; + /** + * @param array \$properties + */ + public function __construct(array \$properties) + { + \$this->properties = \$properties; + } + /** @return array */ + public function properties(): array + { + return \$this->properties; + } + } + + PHP; + + self::assertEquals($expected, $files['/generated/Metadata.php']); + } + + #[Test] + public function record_with_enum_field(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Vehicles'); + + self::assertCount(2, $files); + self::assertArrayHasKey('/generated/Car.php', $files); + self::assertArrayHasKey('/generated/FuelType.php', $files); + + $expectedCar = <<brand = \$brand; + \$this->fuel = \$fuel; + } + public function brand(): string + { + return \$this->brand; + } + public function fuel(): \App\Vehicles\FuelType + { + return \$this->fuel; + } + } + + PHP; + + self::assertEquals($expectedCar, $files['/generated/Car.php']); + + $expectedEnum = <<transpiler->translate($avroSchema, '/generated', 'App\\Social'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/Profile.php', $files); + + $expected = <<username = \$username; + \$this->bio = \$bio; + } + public function username(): string + { + return \$this->username; + } + public function bio(): null|string + { + return \$this->bio; + } + } + + PHP; + + self::assertEquals($expected, $files['/generated/Profile.php']); + } + + #[Test] + public function record_with_all_primitive_types(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Types'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/AllTypes.php', $files); + + $expected = <<nullField = \$nullField; + \$this->boolField = \$boolField; + \$this->intField = \$intField; + \$this->longField = \$longField; + \$this->floatField = \$floatField; + \$this->doubleField = \$doubleField; + \$this->stringField = \$stringField; + \$this->bytesField = \$bytesField; + } + public function nullField(): null + { + return \$this->nullField; + } + public function boolField(): bool + { + return \$this->boolField; + } + public function intField(): int + { + return \$this->intField; + } + public function longField(): int + { + return \$this->longField; + } + public function floatField(): float + { + return \$this->floatField; + } + public function doubleField(): float + { + return \$this->doubleField; + } + public function stringField(): string + { + return \$this->stringField; + } + public function bytesField(): string + { + return \$this->bytesField; + } + } + + PHP; + + self::assertEquals($expected, $files['/generated/AllTypes.php']); + } + + #[Test] + public function record_with_nested_array_of_records(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Org'); + + self::assertCount(2, $files); + self::assertArrayHasKey('/generated/Team.php', $files); + self::assertArrayHasKey('/generated/Member.php', $files); + + $expectedTeam = << */ + private array \$members; + /** + * @param list<\App\Org\Member> \$members + */ + public function __construct(string \$name, array \$members) + { + \$this->name = \$name; + \$this->members = \$members; + } + public function name(): string + { + return \$this->name; + } + /** @return list<\App\Org\Member> */ + public function members(): array + { + return \$this->members; + } + } + + PHP; + + self::assertEquals($expectedTeam, $files['/generated/Team.php']); + + $expectedMember = <<name = \$name; + \$this->role = \$role; + } + public function name(): string + { + return \$this->name; + } + public function role(): string + { + return \$this->role; + } + } + + PHP; + + self::assertEquals($expectedMember, $files['/generated/Member.php']); + } + + #[Test] + public function record_with_multiple_union_types(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Events'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/Event.php', $files); + + $expected = <<payload = \$payload; + } + public function payload(): null|string|int|bool + { + return \$this->payload; + } + } + + PHP; + + self::assertEquals($expected, $files['/generated/Event.php']); + } + + #[Test] + public function record_with_nested_record_field(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Shop'); + + self::assertCount(2, $files); + self::assertArrayHasKey('/generated/Order.php', $files); + self::assertArrayHasKey('/generated/Address.php', $files); + + $expectedOrder = <<id = \$id; + \$this->address = \$address; + } + public function id(): int + { + return \$this->id; + } + public function address(): \App\Shop\Address + { + return \$this->address; + } + } + + PHP; + + self::assertEquals($expectedOrder, $files['/generated/Order.php']); + + $expectedAddress = <<street = \$street; + \$this->city = \$city; + } + public function street(): string + { + return \$this->street; + } + public function city(): string + { + return \$this->city; + } + } + + PHP; + + self::assertEquals($expectedAddress, $files['/generated/Address.php']); + } + + #[Test] + public function enum_with_single_symbol(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Enums'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/Singleton.php', $files); + + $expected = <<transpiler->translate($avroSchema, '/generated', 'App\\HR'); + + self::assertCount(2, $files); + self::assertArrayHasKey('/generated/Employee.php', $files); + self::assertArrayHasKey('/generated/Manager.php', $files); + + $expectedEmployee = <<name = \$name; + \$this->manager = \$manager; + } + public function name(): string + { + return \$this->name; + } + public function manager(): null|\App\HR\Manager + { + return \$this->manager; + } + } + + PHP; + + self::assertEquals($expectedEmployee, $files['/generated/Employee.php']); + + $expectedManager = <<name = \$name; + \$this->department = \$department; + } + public function name(): string + { + return \$this->name; + } + public function department(): string + { + return \$this->department; + } + } + + PHP; + + self::assertEquals($expectedManager, $files['/generated/Manager.php']); + } + + #[Test] + public function record_with_map_of_records(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Library'); + + self::assertCount(2, $files); + self::assertArrayHasKey('/generated/Library.php', $files); + self::assertArrayHasKey('/generated/Book.php', $files); + + $expectedLibrary = << */ + private array \$books; + /** + * @param array \$books + */ + public function __construct(string \$name, array \$books) + { + \$this->name = \$name; + \$this->books = \$books; + } + public function name(): string + { + return \$this->name; + } + /** @return array */ + public function books(): array + { + return \$this->books; + } + } + + PHP; + + self::assertEquals($expectedLibrary, $files['/generated/Library.php']); + + $expectedBook = <<title = \$title; + \$this->pages = \$pages; + } + public function title(): string + { + return \$this->title; + } + public function pages(): int + { + return \$this->pages; + } + } + + PHP; + + self::assertEquals($expectedBook, $files['/generated/Book.php']); + } + + #[Test] + public function record_with_record_reuse_by_name(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Billing'); + + self::assertCount(2, $files); + self::assertArrayHasKey('/generated/Invoice.php', $files); + self::assertArrayHasKey('/generated/PostalAddress.php', $files); + + $expectedInvoice = <<id = \$id; + \$this->billingAddress = \$billingAddress; + \$this->shippingAddress = \$shippingAddress; + } + public function id(): int + { + return \$this->id; + } + public function billingAddress(): \App\Billing\PostalAddress + { + return \$this->billingAddress; + } + public function shippingAddress(): \App\Billing\PostalAddress + { + return \$this->shippingAddress; + } + } + + PHP; + + self::assertEquals($expectedInvoice, $files['/generated/Invoice.php']); + } + + #[Test] + public function record_with_array_default_value(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Config'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/Settings.php', $files); + + $expected = << */ + private array \$tags = []; + /** + * @param list \$tags + */ + public function __construct(array \$tags = []) + { + \$this->tags = \$tags; + } + /** @return list */ + public function tags(): array + { + return \$this->tags; + } + } + + PHP; + + self::assertEquals($expected, $files['/generated/Settings.php']); + } + + #[Test] + public function record_with_mixed_default_and_required_fields(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Inventory'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/Item.php', $files); + + $expected = <<name = \$name; + \$this->quantity = \$quantity; + \$this->description = \$description; + } + public function name(): string + { + return \$this->name; + } + public function quantity(): int + { + return \$this->quantity; + } + public function description(): string + { + return \$this->description; + } + } + + PHP; + + self::assertEquals($expected, $files['/generated/Item.php']); + } + + #[Test] + public function record_with_nullable_enum_field(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Tasks'); + + self::assertCount(2, $files); + self::assertArrayHasKey('/generated/Task.php', $files); + self::assertArrayHasKey('/generated/Priority.php', $files); + + $expectedTask = <<title = \$title; + \$this->priority = \$priority; + } + public function title(): string + { + return \$this->title; + } + public function priority(): null|\App\Tasks\Priority + { + return \$this->priority; + } + } + + PHP; + + self::assertEquals($expectedTask, $files['/generated/Task.php']); + + $expectedPriority = <<transpiler->translate($avroSchema, '/generated', 'App\\Reports'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/Report.php', $files); + + $expected = << */ + private null|array \$scores = null; + /** + * @param null|list \$scores + */ + public function __construct(string \$title, null|array \$scores = null) + { + \$this->title = \$title; + \$this->scores = \$scores; + } + public function title(): string + { + return \$this->title; + } + /** @return null|list */ + public function scores(): null|array + { + return \$this->scores; + } + } + + PHP; + + self::assertEquals($expected, $files['/generated/Report.php']); + } + + #[Test] + public function record_with_nullable_map_field(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\UI'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/Dashboard.php', $files); + + $expected = << */ + private null|array \$widgets = null; + /** + * @param null|array \$widgets + */ + public function __construct(null|array \$widgets = null) + { + \$this->widgets = \$widgets; + } + /** @return null|array */ + public function widgets(): null|array + { + return \$this->widgets; + } + } + + PHP; + + self::assertEquals($expected, $files['/generated/Dashboard.php']); + } + + #[Test] + public function record_with_nested_array_of_arrays(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Math'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/Matrix.php', $files); + + $expected = <<> */ + private array \$rows; + /** + * @param list> \$rows + */ + public function __construct(array \$rows) + { + \$this->rows = \$rows; + } + /** @return list> */ + public function rows(): array + { + return \$this->rows; + } + } + + PHP; + + self::assertEquals($expected, $files['/generated/Matrix.php']); + } + + #[Test] + public function record_with_map_of_arrays(): void + { + $schema = <<transpiler->translate($avroSchema, '/generated', 'App\\Search'); + + self::assertCount(1, $files); + self::assertArrayHasKey('/generated/Index.php', $files); + + $expected = <<> */ + private array \$entries; + /** + * @param array> \$entries + */ + public function __construct(array \$entries) + { + \$this->entries = \$entries; + } + /** @return array> */ + public function entries(): array + { + return \$this->entries; + } + } + + PHP; + + self::assertEquals($expected, $files['/generated/Index.php']); + } } From 5ce714985a6bb9795843a4b92f4c96227613f394 Mon Sep 17 00:00:00 2001 From: mattiabasone Date: Fri, 27 Mar 2026 16:28:52 +0100 Subject: [PATCH 3/4] renaming --- ...roTranspiler.php => AvroCodeGenerator.php} | 53 +++++- ...ion.php => AvroCodeGeneratorException.php} | 2 +- ...ilerTest.php => AvroCodeGeneratorTest.php} | 164 ++++++++++++++---- 3 files changed, 185 insertions(+), 34 deletions(-) rename lang/php/lib/Generator/{AvroTranspiler.php => AvroCodeGenerator.php} (84%) rename lang/php/lib/Generator/{AvroTranspilerException.php => AvroCodeGeneratorException.php} (94%) rename lang/php/test/Generator/{AvroTranspilerTest.php => AvroCodeGeneratorTest.php} (87%) diff --git a/lang/php/lib/Generator/AvroTranspiler.php b/lang/php/lib/Generator/AvroCodeGenerator.php similarity index 84% rename from lang/php/lib/Generator/AvroTranspiler.php rename to lang/php/lib/Generator/AvroCodeGenerator.php index 2bec01b4c52..cd12c4c5f73 100644 --- a/lang/php/lib/Generator/AvroTranspiler.php +++ b/lang/php/lib/Generator/AvroCodeGenerator.php @@ -31,10 +31,11 @@ use Apache\Avro\Schema\AvroUnionSchema; use PhpParser\BuilderFactory; use PhpParser\Node; +use PhpParser\Node\Scalar\String_; use PhpParser\Node\Stmt; use PhpParser\PrettyPrinter\Standard; -class AvroTranspiler +class AvroCodeGenerator { private BuilderFactory $factory; private Standard $printer; @@ -136,7 +137,7 @@ private function buildRecord( string $phpNamespace ): Node { $className = ucwords($avroRecord->name()); - $class = $this->factory->class($className)->makeFinal(); + $class = $this->factory->class($className)->makeFinal()->implement('\\JsonSerializable'); foreach ($avroRecord->fields() as $field) { $phpType = $this->avroTypeToPhp($field->type(), $phpNamespace); @@ -207,11 +208,57 @@ private function buildRecord( $class->addStmt($getter); } + $arrayItems = []; + foreach ($avroRecord->fields() as $field) { + $arrayItems[] = new Node\ArrayItem( + $this->buildJsonSerializeValue($field->type(), $field->name()), + new String_($field->name()) + ); + } + $jsonSerialize = $this->factory->method('jsonSerialize') + ->makePublic() + ->setReturnType('mixed') + ->addStmt( + new Stmt\Return_( + new Node\Expr\Array_($arrayItems, ['kind' => Node\Expr\Array_::KIND_SHORT]) + ) + ); + $class->addStmt($jsonSerialize); + return $this->factory->namespace($phpNamespace) ->addStmt($class) ->getNode(); } + /** + * Builds the expression used inside jsonSerialize() for a single field. + * + * - EnumSchema → $this->field->value (plain string for Avro + JSON) + * - union[null, Enum] → $this->field?->value (null-safe, still plain) + * - anything else → $this->field + */ + private function buildJsonSerializeValue(AvroSchema $fieldType, string $fieldName): Node\Expr + { + $propertyFetch = new Node\Expr\PropertyFetch(new Node\Expr\Variable('this'), $fieldName); + + if ($fieldType instanceof AvroEnumSchema) { + return new Node\Expr\PropertyFetch($propertyFetch, 'value'); + } + + if ($fieldType instanceof AvroUnionSchema) { + $nonNullSchemas = array_values(array_filter( + $fieldType->schemas(), + static fn (AvroSchema $s): bool => !($s instanceof AvroPrimitiveSchema && AvroSchema::NULL_TYPE === $s->type()) + )); + + if (1 === count($nonNullSchemas) && $nonNullSchemas[0] instanceof AvroEnumSchema) { + return new Node\Expr\NullsafePropertyFetch($propertyFetch, 'value'); + } + } + + return $propertyFetch; + } + /** * @param list $values */ @@ -254,7 +301,7 @@ private function avroPrimitiveTypeToPhp(AvroPrimitiveSchema $primitiveSchema): s AvroSchema::INT_TYPE, AvroSchema::LONG_TYPE => 'int', AvroSchema::FLOAT_TYPE, AvroSchema::DOUBLE_TYPE => 'float', AvroSchema::STRING_TYPE, AvroSchema::BYTES_TYPE => 'string', - default => throw new AvroTranspilerException("Unknown primitive type: ".$primitiveSchema->type()), + default => throw new AvroCodeGeneratorException("Unknown primitive type: ".$primitiveSchema->type()), }; } diff --git a/lang/php/lib/Generator/AvroTranspilerException.php b/lang/php/lib/Generator/AvroCodeGeneratorException.php similarity index 94% rename from lang/php/lib/Generator/AvroTranspilerException.php rename to lang/php/lib/Generator/AvroCodeGeneratorException.php index ad80640a9db..5a51754fc3d 100644 --- a/lang/php/lib/Generator/AvroTranspilerException.php +++ b/lang/php/lib/Generator/AvroCodeGeneratorException.php @@ -22,6 +22,6 @@ namespace Apache\Avro\Generator; -class AvroTranspilerException extends \Exception +class AvroCodeGeneratorException extends \Exception { } diff --git a/lang/php/test/Generator/AvroTranspilerTest.php b/lang/php/test/Generator/AvroCodeGeneratorTest.php similarity index 87% rename from lang/php/test/Generator/AvroTranspilerTest.php rename to lang/php/test/Generator/AvroCodeGeneratorTest.php index df0fdc05cbe..8473727795f 100644 --- a/lang/php/test/Generator/AvroTranspilerTest.php +++ b/lang/php/test/Generator/AvroCodeGeneratorTest.php @@ -22,18 +22,18 @@ namespace Apache\Avro\Tests\Generator; -use Apache\Avro\Generator\AvroTranspiler; +use Apache\Avro\Generator\AvroCodeGenerator; use Apache\Avro\Schema\AvroSchema; use PHPUnit\Framework\Attributes\Test; use PHPUnit\Framework\TestCase; -class AvroTranspilerTest extends TestCase +class AvroCodeGeneratorTest extends TestCase { - private AvroTranspiler $transpiler; + private AvroCodeGenerator $transpiler; public function setUp(): void { - $this->transpiler = new AvroTranspiler(); + $this->transpiler = new AvroCodeGenerator(); } #[Test] @@ -84,7 +84,7 @@ public function nested_schema_generation(): void namespace MyApp\Avro\Generated; - final class Lisp + final class Lisp implements \JsonSerializable { private null|string|\MyApp\Avro\Generated\Cons \$value; public function __construct(null|string|\MyApp\Avro\Generated\Cons \$value) @@ -95,6 +95,10 @@ public function value(): null|string|\MyApp\Avro\Generated\Cons { return \$this->value; } + public function jsonSerialize(): mixed + { + return ['value' => \$this->value]; + } } PHP; @@ -108,7 +112,7 @@ public function value(): null|string|\MyApp\Avro\Generated\Cons namespace MyApp\Avro\Generated; - final class Cons + final class Cons implements \JsonSerializable { private \MyApp\Avro\Generated\Lisp \$car; private \MyApp\Avro\Generated\Lisp \$cdr; @@ -125,6 +129,10 @@ public function cdr(): \MyApp\Avro\Generated\Lisp { return \$this->cdr; } + public function jsonSerialize(): mixed + { + return ['car' => \$this->car, 'cdr' => \$this->cdr]; + } } PHP; @@ -160,7 +168,7 @@ public function simple_record_with_primitive_types(): void namespace App\Model; - final class User + final class User implements \JsonSerializable { private string \$name; private int \$age; @@ -189,6 +197,10 @@ public function score(): float { return \$this->score; } + public function jsonSerialize(): mixed + { + return ['name' => \$this->name, 'age' => \$this->age, 'active' => \$this->active, 'score' => \$this->score]; + } } PHP; @@ -260,7 +272,7 @@ public function record_with_default_values(): void namespace App\Config; - final class Config + final class Config implements \JsonSerializable { private int \$retries = 3; private string \$label = 'default'; @@ -283,6 +295,10 @@ public function enabled(): bool { return \$this->enabled; } + public function jsonSerialize(): mixed + { + return ['retries' => \$this->retries, 'label' => \$this->label, 'enabled' => \$this->enabled]; + } } PHP; @@ -317,7 +333,7 @@ public function record_with_array_field(): void namespace App\Music; - final class Playlist + final class Playlist implements \JsonSerializable { private string \$name; /** @var list */ @@ -339,6 +355,10 @@ public function tags(): array { return \$this->tags; } + public function jsonSerialize(): mixed + { + return ['name' => \$this->name, 'tags' => \$this->tags]; + } } PHP; @@ -372,7 +392,7 @@ public function record_with_map_field(): void namespace App\Data; - final class Metadata + final class Metadata implements \JsonSerializable { /** @var array */ private array \$properties; @@ -388,6 +408,10 @@ public function properties(): array { return \$this->properties; } + public function jsonSerialize(): mixed + { + return ['properties' => \$this->properties]; + } } PHP; @@ -430,7 +454,7 @@ public function record_with_enum_field(): void namespace App\Vehicles; - final class Car + final class Car implements \JsonSerializable { private string \$brand; private \App\Vehicles\FuelType \$fuel; @@ -447,6 +471,10 @@ public function fuel(): \App\Vehicles\FuelType { return \$this->fuel; } + public function jsonSerialize(): mixed + { + return ['brand' => \$this->brand, 'fuel' => \$this->fuel->value]; + } } PHP; @@ -499,7 +527,7 @@ public function record_with_nullable_field(): void namespace App\Social; - final class Profile + final class Profile implements \JsonSerializable { private string \$username; private null|string \$bio = null; @@ -516,6 +544,10 @@ public function bio(): null|string { return \$this->bio; } + public function jsonSerialize(): mixed + { + return ['username' => \$this->username, 'bio' => \$this->bio]; + } } PHP; @@ -556,7 +588,7 @@ public function record_with_all_primitive_types(): void namespace App\Types; - final class AllTypes + final class AllTypes implements \JsonSerializable { private null \$nullField; private bool \$boolField; @@ -609,6 +641,10 @@ public function bytesField(): string { return \$this->bytesField; } + public function jsonSerialize(): mixed + { + return ['nullField' => \$this->nullField, 'boolField' => \$this->boolField, 'intField' => \$this->intField, 'longField' => \$this->longField, 'floatField' => \$this->floatField, 'doubleField' => \$this->doubleField, 'stringField' => \$this->stringField, 'bytesField' => \$this->bytesField]; + } } PHP; @@ -657,7 +693,7 @@ public function record_with_nested_array_of_records(): void namespace App\Org; - final class Team + final class Team implements \JsonSerializable { private string \$name; /** @var list<\App\Org\Member> */ @@ -679,6 +715,10 @@ public function members(): array { return \$this->members; } + public function jsonSerialize(): mixed + { + return ['name' => \$this->name, 'members' => \$this->members]; + } } PHP; @@ -692,7 +732,7 @@ public function members(): array namespace App\Org; - final class Member + final class Member implements \JsonSerializable { private string \$name; private string \$role; @@ -709,6 +749,10 @@ public function role(): string { return \$this->role; } + public function jsonSerialize(): mixed + { + return ['name' => \$this->name, 'role' => \$this->role]; + } } PHP; @@ -742,7 +786,7 @@ public function record_with_multiple_union_types(): void namespace App\Events; - final class Event + final class Event implements \JsonSerializable { private null|string|int|bool \$payload; public function __construct(null|string|int|bool \$payload) @@ -753,6 +797,10 @@ public function payload(): null|string|int|bool { return \$this->payload; } + public function jsonSerialize(): mixed + { + return ['payload' => \$this->payload]; + } } PHP; @@ -798,7 +846,7 @@ public function record_with_nested_record_field(): void namespace App\Shop; - final class Order + final class Order implements \JsonSerializable { private int \$id; private \App\Shop\Address \$address; @@ -815,6 +863,10 @@ public function address(): \App\Shop\Address { return \$this->address; } + public function jsonSerialize(): mixed + { + return ['id' => \$this->id, 'address' => \$this->address]; + } } PHP; @@ -828,7 +880,7 @@ public function address(): \App\Shop\Address namespace App\Shop; - final class Address + final class Address implements \JsonSerializable { private string \$street; private string \$city; @@ -845,6 +897,10 @@ public function city(): string { return \$this->city; } + public function jsonSerialize(): mixed + { + return ['street' => \$this->street, 'city' => \$this->city]; + } } PHP; @@ -928,7 +984,7 @@ public function record_with_nullable_record_field(): void namespace App\HR; - final class Employee + final class Employee implements \JsonSerializable { private string \$name; private null|\App\HR\Manager \$manager = null; @@ -945,6 +1001,10 @@ public function manager(): null|\App\HR\Manager { return \$this->manager; } + public function jsonSerialize(): mixed + { + return ['name' => \$this->name, 'manager' => \$this->manager]; + } } PHP; @@ -958,7 +1018,7 @@ public function manager(): null|\App\HR\Manager namespace App\HR; - final class Manager + final class Manager implements \JsonSerializable { private string \$name; private string \$department; @@ -975,6 +1035,10 @@ public function department(): string { return \$this->department; } + public function jsonSerialize(): mixed + { + return ['name' => \$this->name, 'department' => \$this->department]; + } } PHP; @@ -1023,7 +1087,7 @@ public function record_with_map_of_records(): void namespace App\Library; - final class Library + final class Library implements \JsonSerializable { private string \$name; /** @var array */ @@ -1045,6 +1109,10 @@ public function books(): array { return \$this->books; } + public function jsonSerialize(): mixed + { + return ['name' => \$this->name, 'books' => \$this->books]; + } } PHP; @@ -1058,7 +1126,7 @@ public function books(): array namespace App\Library; - final class Book + final class Book implements \JsonSerializable { private string \$title; private int \$pages; @@ -1075,6 +1143,10 @@ public function pages(): int { return \$this->pages; } + public function jsonSerialize(): mixed + { + return ['title' => \$this->title, 'pages' => \$this->pages]; + } } PHP; @@ -1124,7 +1196,7 @@ public function record_with_record_reuse_by_name(): void namespace App\Billing; - final class Invoice + final class Invoice implements \JsonSerializable { private int \$id; private \App\Billing\PostalAddress \$billingAddress; @@ -1147,6 +1219,10 @@ public function shippingAddress(): \App\Billing\PostalAddress { return \$this->shippingAddress; } + public function jsonSerialize(): mixed + { + return ['id' => \$this->id, 'billingAddress' => \$this->billingAddress, 'shippingAddress' => \$this->shippingAddress]; + } } PHP; @@ -1180,7 +1256,7 @@ public function record_with_array_default_value(): void namespace App\Config; - final class Settings + final class Settings implements \JsonSerializable { /** @var list */ private array \$tags = []; @@ -1196,6 +1272,10 @@ public function tags(): array { return \$this->tags; } + public function jsonSerialize(): mixed + { + return ['tags' => \$this->tags]; + } } PHP; @@ -1231,7 +1311,7 @@ public function record_with_mixed_default_and_required_fields(): void namespace App\Inventory; - final class Item + final class Item implements \JsonSerializable { private string \$name; private int \$quantity = 1; @@ -1254,6 +1334,10 @@ public function description(): string { return \$this->description; } + public function jsonSerialize(): mixed + { + return ['name' => \$this->name, 'quantity' => \$this->quantity, 'description' => \$this->description]; + } } PHP; @@ -1300,7 +1384,7 @@ public function record_with_nullable_enum_field(): void namespace App\Tasks; - final class Task + final class Task implements \JsonSerializable { private string \$title; private null|\App\Tasks\Priority \$priority = null; @@ -1317,6 +1401,10 @@ public function priority(): null|\App\Tasks\Priority { return \$this->priority; } + public function jsonSerialize(): mixed + { + return ['title' => \$this->title, 'priority' => \$this->priority?->value]; + } } PHP; @@ -1373,7 +1461,7 @@ public function record_with_nullable_array_field(): void namespace App\Reports; - final class Report + final class Report implements \JsonSerializable { private string \$title; /** @var null|list */ @@ -1395,6 +1483,10 @@ public function scores(): null|array { return \$this->scores; } + public function jsonSerialize(): mixed + { + return ['title' => \$this->title, 'scores' => \$this->scores]; + } } PHP; @@ -1432,7 +1524,7 @@ public function record_with_nullable_map_field(): void namespace App\UI; - final class Dashboard + final class Dashboard implements \JsonSerializable { /** @var null|array */ private null|array \$widgets = null; @@ -1448,6 +1540,10 @@ public function widgets(): null|array { return \$this->widgets; } + public function jsonSerialize(): mixed + { + return ['widgets' => \$this->widgets]; + } } PHP; @@ -1490,7 +1586,7 @@ public function record_with_nested_array_of_arrays(): void namespace App\Math; - final class Matrix + final class Matrix implements \JsonSerializable { /** @var list> */ private array \$rows; @@ -1506,6 +1602,10 @@ public function rows(): array { return \$this->rows; } + public function jsonSerialize(): mixed + { + return ['rows' => \$this->rows]; + } } PHP; @@ -1548,7 +1648,7 @@ public function record_with_map_of_arrays(): void namespace App\Search; - final class Index + final class Index implements \JsonSerializable { /** @var array> */ private array \$entries; @@ -1564,6 +1664,10 @@ public function entries(): array { return \$this->entries; } + public function jsonSerialize(): mixed + { + return ['entries' => \$this->entries]; + } } PHP; From 8147d94abd306cad38001b4d51f8fe36a0bf1b2a Mon Sep 17 00:00:00 2001 From: mattiabasone Date: Fri, 10 Apr 2026 17:31:25 +0200 Subject: [PATCH 4/4] add AvroSpecificDatumWriter and tests + fixtures --- .../php/lib/Datum/AvroSpecificDatumWriter.php | 236 ++++++++ .../Datum/AvroSpecificDatumWriterTest.php | 533 ++++++++++++++++++ lang/php/test/Fixtures/Generated/Address.php | 50 ++ lang/php/test/Fixtures/Generated/Car.php | 50 ++ lang/php/test/Fixtures/Generated/FuelType.php | 30 + lang/php/test/Fixtures/Generated/Member.php | 50 ++ lang/php/test/Fixtures/Generated/Metadata.php | 48 ++ lang/php/test/Fixtures/Generated/Order.php | 50 ++ lang/php/test/Fixtures/Generated/Priority.php | 30 + lang/php/test/Fixtures/Generated/Profile.php | 50 ++ lang/php/test/Fixtures/Generated/Task.php | 50 ++ lang/php/test/Fixtures/Generated/Team.php | 55 ++ lang/php/test/Fixtures/Generated/User.php | 64 +++ 13 files changed, 1296 insertions(+) create mode 100644 lang/php/lib/Datum/AvroSpecificDatumWriter.php create mode 100644 lang/php/test/Datum/AvroSpecificDatumWriterTest.php create mode 100644 lang/php/test/Fixtures/Generated/Address.php create mode 100644 lang/php/test/Fixtures/Generated/Car.php create mode 100644 lang/php/test/Fixtures/Generated/FuelType.php create mode 100644 lang/php/test/Fixtures/Generated/Member.php create mode 100644 lang/php/test/Fixtures/Generated/Metadata.php create mode 100644 lang/php/test/Fixtures/Generated/Order.php create mode 100644 lang/php/test/Fixtures/Generated/Priority.php create mode 100644 lang/php/test/Fixtures/Generated/Profile.php create mode 100644 lang/php/test/Fixtures/Generated/Task.php create mode 100644 lang/php/test/Fixtures/Generated/Team.php create mode 100644 lang/php/test/Fixtures/Generated/User.php diff --git a/lang/php/lib/Datum/AvroSpecificDatumWriter.php b/lang/php/lib/Datum/AvroSpecificDatumWriter.php new file mode 100644 index 00000000000..44c0a21cac1 --- /dev/null +++ b/lang/php/lib/Datum/AvroSpecificDatumWriter.php @@ -0,0 +1,236 @@ +value holds the Avro symbol string. + * + * Usage: + * $schema = AvroSchema::parse($json); + * $writer = new AvroSpecificDatumWriter($schema); + * $io = new \Apache\Avro\IO\AvroStringIO(); + * $encoder = new AvroIOBinaryEncoder($io); + * $writer->write($myGeneratedObject, $encoder); + * $bytes = $io->string(); + */ +class AvroSpecificDatumWriter +{ + public function __construct( + private readonly AvroSchema $writersSchema + ) { + } + + /** + * Serializes the given datum (a generated record instance) to the encoder. + * + * @throws AvroException + */ + public function write(object $datum, AvroIOBinaryEncoder $encoder): void + { + $this->writeData($this->writersSchema, $datum, $encoder); + } + + /** + * @throws AvroException + */ + private function writeData(AvroSchema $schema, mixed $datum, AvroIOBinaryEncoder $encoder): void + { + match (true) { + $schema instanceof AvroRecordSchema => $this->writeRecord($schema, $datum, $encoder), + $schema instanceof AvroEnumSchema => $this->writeEnum($schema, $datum, $encoder), + $schema instanceof AvroArraySchema => $this->writeArray($schema, $datum, $encoder), + $schema instanceof AvroMapSchema => $this->writeMap($schema, $datum, $encoder), + $schema instanceof AvroUnionSchema => $this->writeUnion($schema, $datum, $encoder), + $schema instanceof AvroPrimitiveSchema => $this->writePrimitive($schema, $datum, $encoder), + default => throw new AvroException(sprintf('Unsupported schema type: %s', $schema->type())), + }; + } + + /** + * Writes a record by calling the getter for each field defined in the schema. + * + * @throws AvroException + */ + private function writeRecord(AvroRecordSchema $schema, object $datum, AvroIOBinaryEncoder $encoder): void + { + foreach ($schema->fields() as $field) { + $value = $datum->{$field->name()}(); + $this->writeData($field->type(), $value, $encoder); + } + } + + /** + * Writes a backed enum value by looking up its symbol index. + * + * @throws AvroException + */ + private function writeEnum(AvroEnumSchema $schema, \BackedEnum $datum, AvroIOBinaryEncoder $encoder): void + { + $symbolIndex = $schema->symbolIndex($datum->value); + $encoder->writeInt($symbolIndex); + } + + /** + * @param list $datum + * + * @throws AvroException + */ + private function writeArray(AvroArraySchema $schema, array $datum, AvroIOBinaryEncoder $encoder): void + { + $count = count($datum); + if ($count > 0) { + $encoder->writeLong($count); + foreach ($datum as $item) { + $this->writeData($schema->items(), $item, $encoder); + } + } + $encoder->writeLong(0); + } + + /** + * @param array $datum + * + * @throws AvroException + */ + private function writeMap(AvroMapSchema $schema, array $datum, AvroIOBinaryEncoder $encoder): void + { + $count = count($datum); + if ($count > 0) { + $encoder->writeLong($count); + foreach ($datum as $key => $value) { + $encoder->writeString((string) $key); + $this->writeData($schema->values(), $value, $encoder); + } + } + $encoder->writeLong(0); + } + + /** + * Writes a union value by finding the matching branch schema. + * + * @throws AvroIOTypeException if no branch matches the datum + * @throws AvroException + */ + private function writeUnion(AvroUnionSchema $schema, mixed $datum, AvroIOBinaryEncoder $encoder): void + { + $matchedIndex = null; + $matchedSchema = null; + + foreach ($schema->schemas() as $index => $branchSchema) { + if ($this->datumMatchesSchema($branchSchema, $datum)) { + $matchedIndex = $index; + $matchedSchema = $branchSchema; + + break; + } + } + + if (null === $matchedSchema) { + throw new AvroIOTypeException($schema, $datum); + } + + $encoder->writeLong($matchedIndex); + $this->writeData($matchedSchema, $datum, $encoder); + } + + /** + * Writes a primitive value using the appropriate encoder method. + * + * @throws AvroException + */ + private function writePrimitive(AvroPrimitiveSchema $schema, mixed $datum, AvroIOBinaryEncoder $encoder): void + { + match ($schema->type()) { + AvroSchema::NULL_TYPE => $encoder->writeNull($datum), + AvroSchema::BOOLEAN_TYPE => $encoder->writeBoolean($datum), + AvroSchema::INT_TYPE => $encoder->writeInt($datum), + AvroSchema::LONG_TYPE => $encoder->writeLong($datum), + AvroSchema::FLOAT_TYPE => $encoder->writeFloat($datum), + AvroSchema::DOUBLE_TYPE => $encoder->writeDouble($datum), + AvroSchema::STRING_TYPE => $encoder->writeString($datum), + AvroSchema::BYTES_TYPE => $encoder->writeBytes($datum), + default => throw new AvroException(sprintf('Unknown primitive type: %s', $schema->type())), + }; + } + + /** + * Determines whether the given datum matches the given schema branch. + * Used by writeUnion() to find the correct branch index. + */ + private function datumMatchesSchema(AvroSchema $schema, mixed $datum): bool + { + return match (true) { + $schema instanceof AvroPrimitiveSchema => $this->datumMatchesPrimitive($schema, $datum), + $schema instanceof AvroEnumSchema => $datum instanceof \BackedEnum + && $this->classNameMatchesSchema($datum, $schema->name()), + $schema instanceof AvroRecordSchema => is_object($datum) + && !($datum instanceof \BackedEnum) + && $this->classNameMatchesSchema($datum, $schema->name()), + $schema instanceof AvroArraySchema => is_array($datum) + && ([] === $datum || array_is_list($datum)), + $schema instanceof AvroMapSchema => is_array($datum), + default => false, + }; + } + + private function datumMatchesPrimitive(AvroPrimitiveSchema $schema, mixed $datum): bool + { + return match ($schema->type()) { + AvroSchema::NULL_TYPE => null === $datum, + AvroSchema::BOOLEAN_TYPE => is_bool($datum), + AvroSchema::INT_TYPE => is_int($datum) + && $datum >= AvroSchema::INT_MIN_VALUE + && $datum <= AvroSchema::INT_MAX_VALUE, + AvroSchema::LONG_TYPE => is_int($datum), + AvroSchema::FLOAT_TYPE, AvroSchema::DOUBLE_TYPE => is_float($datum) || is_int($datum), + AvroSchema::STRING_TYPE, AvroSchema::BYTES_TYPE => is_string($datum), + default => false, + }; + } + + /** + * Checks whether the short class name of the datum matches the Avro schema name. + * Generated classes use ucwords(schemaName) as the class name. + */ + private function classNameMatchesSchema(object $datum, string $schemaName): bool + { + $className = (new \ReflectionClass($datum))->getShortName(); + + return 0 === strcasecmp($className, $schemaName); + } +} diff --git a/lang/php/test/Datum/AvroSpecificDatumWriterTest.php b/lang/php/test/Datum/AvroSpecificDatumWriterTest.php new file mode 100644 index 00000000000..366f3b117e2 --- /dev/null +++ b/lang/php/test/Datum/AvroSpecificDatumWriterTest.php @@ -0,0 +1,533 @@ +roundTrip($schema, $user); + + self::assertSame('Alice', $result['name']); + self::assertSame(30, $result['age']); + self::assertTrue($result['active']); + self::assertEqualsWithDelta(9.5, $result['score'], 0.001); + } + + #[Test] + public function record_with_enum_field(): void + { + $schema = AvroSchema::parse(<<roundTrip($schema, $car); + + self::assertSame('Tesla', $result['brand']); + self::assertSame('electric', $result['fuel']); + } + + #[Test] + public function record_with_nested_record(): void + { + $schema = AvroSchema::parse(<<roundTrip($schema, $order); + + self::assertSame(42, $result['id']); + self::assertSame('123 Main St', $result['address']['street']); + self::assertSame('Springfield', $result['address']['city']); + } + + #[Test] + public function record_with_nullable_field_present(): void + { + $schema = AvroSchema::parse(<<roundTrip($schema, $profile); + + self::assertSame('bob', $result['username']); + self::assertSame('Hello world', $result['bio']); + } + + #[Test] + public function record_with_nullable_field_null(): void + { + $schema = AvroSchema::parse(<<roundTrip($schema, $profile); + + self::assertSame('bob', $result['username']); + self::assertNull($result['bio']); + } + + #[Test] + public function record_with_nullable_enum_present(): void + { + $schema = AvroSchema::parse(<<roundTrip($schema, $task); + + self::assertSame('Fix bug', $result['title']); + self::assertSame('high', $result['priority']); + } + + #[Test] + public function record_with_nullable_enum_null(): void + { + $schema = AvroSchema::parse(<<roundTrip($schema, $task); + + self::assertSame('No priority', $result['title']); + self::assertNull($result['priority']); + } + + #[Test] + public function record_with_array_of_records(): void + { + $schema = AvroSchema::parse(<<roundTrip($schema, $team); + + self::assertSame('Engineering', $result['name']); + self::assertCount(2, $result['members']); + self::assertSame('Alice', $result['members'][0]['name']); + self::assertSame('Lead', $result['members'][0]['role']); + self::assertSame('Bob', $result['members'][1]['name']); + self::assertSame('Developer', $result['members'][1]['role']); + } + + #[Test] + public function record_with_empty_array(): void + { + $schema = AvroSchema::parse(<<roundTrip($schema, $team); + + self::assertSame('Empty Team', $result['name']); + self::assertSame([], $result['members']); + } + + #[Test] + public function record_with_map_field(): void + { + $schema = AvroSchema::parse(<< 'production', 'version' => '1.2.3']); + $result = $this->roundTrip($schema, $metadata); + + self::assertSame('production', $result['properties']['env']); + self::assertSame('1.2.3', $result['properties']['version']); + } + + #[Test] + public function record_with_empty_map(): void + { + $schema = AvroSchema::parse(<<roundTrip($schema, $metadata); + + self::assertSame([], $result['properties']); + } + + #[Test] + public function produces_same_bytes_as_generic_writer(): void + { + $schema = AvroSchema::parse(<<write($user, $specificEncoder); + + // Generic writer: from associative array + $genericWriter = new AvroIODatumWriter($schema); + $genericIo = new AvroStringIO(); + $genericEncoder = new AvroIOBinaryEncoder($genericIo); + $genericWriter->write( + ['name' => 'Alice', 'age' => 30, 'active' => true, 'score' => 9.5], + $genericEncoder + ); + + self::assertSame($genericIo->string(), $specificIo->string()); + } + + #[Test] + public function produces_same_bytes_for_enum_as_generic_writer(): void + { + $schema = AvroSchema::parse(<<write($car, new AvroIOBinaryEncoder($specificIo)); + + // Generic + $genericWriter = new AvroIODatumWriter($schema); + $genericIo = new AvroStringIO(); + $genericWriter->write( + ['brand' => 'BMW', 'fuel' => 'diesel'], + new AvroIOBinaryEncoder($genericIo) + ); + + self::assertSame($genericIo->string(), $specificIo->string()); + } + + #[Test] + public function produces_same_bytes_for_nested_record_as_generic_writer(): void + { + $schema = AvroSchema::parse(<<write($order, new AvroIOBinaryEncoder($specificIo)); + + // Generic + $genericWriter = new AvroIODatumWriter($schema); + $genericIo = new AvroStringIO(); + $genericWriter->write( + ['id' => 99, 'address' => ['street' => 'Oak Ave', 'city' => 'Riverside']], + new AvroIOBinaryEncoder($genericIo) + ); + + self::assertSame($genericIo->string(), $specificIo->string()); + } + + #[Test] + public function produces_same_bytes_for_nullable_union_as_generic_writer(): void + { + $schema = AvroSchema::parse(<<write($profile, new AvroIOBinaryEncoder($specificIo)); + + $genericWriter = new AvroIODatumWriter($schema); + $genericIo = new AvroStringIO(); + $genericWriter->write( + ['username' => 'alice', 'bio' => 'Bio text'], + new AvroIOBinaryEncoder($genericIo) + ); + + self::assertSame($genericIo->string(), $specificIo->string()); + + // With null + $profileNull = new Profile('bob'); + $specificIo2 = new AvroStringIO(); + $specificWriter->write($profileNull, new AvroIOBinaryEncoder($specificIo2)); + + $genericIo2 = new AvroStringIO(); + $genericWriter->write( + ['username' => 'bob', 'bio' => null], + new AvroIOBinaryEncoder($genericIo2) + ); + + self::assertSame($genericIo2->string(), $specificIo2->string()); + } + + /** + * Helper: serialize with AvroSpecificDatumWriter, then deserialize + * with AvroIODatumReader to get back an associative array. + */ + private function roundTrip(AvroSchema $schema, object $datum): mixed + { + // Serialize + $writer = new AvroSpecificDatumWriter($schema); + $io = new AvroStringIO(); + $encoder = new AvroIOBinaryEncoder($io); + $writer->write($datum, $encoder); + + // Deserialize + $io->seek(0); + $reader = new AvroIODatumReader($schema); + $decoder = new AvroIOBinaryDecoder($io); + + return $reader->read($decoder); + } +} diff --git a/lang/php/test/Fixtures/Generated/Address.php b/lang/php/test/Fixtures/Generated/Address.php new file mode 100644 index 00000000000..1f2abb7985d --- /dev/null +++ b/lang/php/test/Fixtures/Generated/Address.php @@ -0,0 +1,50 @@ +street = $street; + $this->city = $city; + } + + public function street(): string + { + return $this->street; + } + + public function city(): string + { + return $this->city; + } + + public function jsonSerialize(): mixed + { + return ['street' => $this->street, 'city' => $this->city]; + } +} diff --git a/lang/php/test/Fixtures/Generated/Car.php b/lang/php/test/Fixtures/Generated/Car.php new file mode 100644 index 00000000000..d9e223aa770 --- /dev/null +++ b/lang/php/test/Fixtures/Generated/Car.php @@ -0,0 +1,50 @@ +brand = $brand; + $this->fuel = $fuel; + } + + public function brand(): string + { + return $this->brand; + } + + public function fuel(): FuelType + { + return $this->fuel; + } + + public function jsonSerialize(): mixed + { + return ['brand' => $this->brand, 'fuel' => $this->fuel->value]; + } +} diff --git a/lang/php/test/Fixtures/Generated/FuelType.php b/lang/php/test/Fixtures/Generated/FuelType.php new file mode 100644 index 00000000000..5ca9cbdd87a --- /dev/null +++ b/lang/php/test/Fixtures/Generated/FuelType.php @@ -0,0 +1,30 @@ +name = $name; + $this->role = $role; + } + + public function name(): string + { + return $this->name; + } + + public function role(): string + { + return $this->role; + } + + public function jsonSerialize(): mixed + { + return ['name' => $this->name, 'role' => $this->role]; + } +} diff --git a/lang/php/test/Fixtures/Generated/Metadata.php b/lang/php/test/Fixtures/Generated/Metadata.php new file mode 100644 index 00000000000..4c91b566952 --- /dev/null +++ b/lang/php/test/Fixtures/Generated/Metadata.php @@ -0,0 +1,48 @@ + */ + private array $properties; + + /** + * @param array $properties + */ + public function __construct(array $properties) + { + $this->properties = $properties; + } + + /** @return array */ + public function properties(): array + { + return $this->properties; + } + + public function jsonSerialize(): mixed + { + return ['properties' => $this->properties]; + } +} diff --git a/lang/php/test/Fixtures/Generated/Order.php b/lang/php/test/Fixtures/Generated/Order.php new file mode 100644 index 00000000000..b4fee35c8fe --- /dev/null +++ b/lang/php/test/Fixtures/Generated/Order.php @@ -0,0 +1,50 @@ +id = $id; + $this->address = $address; + } + + public function id(): int + { + return $this->id; + } + + public function address(): Address + { + return $this->address; + } + + public function jsonSerialize(): mixed + { + return ['id' => $this->id, 'address' => $this->address]; + } +} diff --git a/lang/php/test/Fixtures/Generated/Priority.php b/lang/php/test/Fixtures/Generated/Priority.php new file mode 100644 index 00000000000..34b7f1d5823 --- /dev/null +++ b/lang/php/test/Fixtures/Generated/Priority.php @@ -0,0 +1,30 @@ +username = $username; + $this->bio = $bio; + } + + public function username(): string + { + return $this->username; + } + + public function bio(): ?string + { + return $this->bio; + } + + public function jsonSerialize(): mixed + { + return ['username' => $this->username, 'bio' => $this->bio]; + } +} diff --git a/lang/php/test/Fixtures/Generated/Task.php b/lang/php/test/Fixtures/Generated/Task.php new file mode 100644 index 00000000000..60adfb44a89 --- /dev/null +++ b/lang/php/test/Fixtures/Generated/Task.php @@ -0,0 +1,50 @@ +title = $title; + $this->priority = $priority; + } + + public function title(): string + { + return $this->title; + } + + public function priority(): ?Priority + { + return $this->priority; + } + + public function jsonSerialize(): mixed + { + return ['title' => $this->title, 'priority' => $this->priority?->value]; + } +} diff --git a/lang/php/test/Fixtures/Generated/Team.php b/lang/php/test/Fixtures/Generated/Team.php new file mode 100644 index 00000000000..1d16dfa9c51 --- /dev/null +++ b/lang/php/test/Fixtures/Generated/Team.php @@ -0,0 +1,55 @@ + */ + private array $members; + + /** + * @param list $members + */ + public function __construct(string $name, array $members) + { + $this->name = $name; + $this->members = $members; + } + + public function name(): string + { + return $this->name; + } + + /** @return list */ + public function members(): array + { + return $this->members; + } + + public function jsonSerialize(): mixed + { + return ['name' => $this->name, 'members' => $this->members]; + } +} diff --git a/lang/php/test/Fixtures/Generated/User.php b/lang/php/test/Fixtures/Generated/User.php new file mode 100644 index 00000000000..b328ebbd627 --- /dev/null +++ b/lang/php/test/Fixtures/Generated/User.php @@ -0,0 +1,64 @@ +name = $name; + $this->age = $age; + $this->active = $active; + $this->score = $score; + } + + public function name(): string + { + return $this->name; + } + + public function age(): int + { + return $this->age; + } + + public function active(): bool + { + return $this->active; + } + + public function score(): float + { + return $this->score; + } + + public function jsonSerialize(): mixed + { + return ['name' => $this->name, 'age' => $this->age, 'active' => $this->active, 'score' => $this->score]; + } +}