Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
6ec71fe
AVRO-4232: [Python] Modernize the Python tooling
martin-g Feb 18, 2026
9902208
Ignore uv.lock for ASFv2 licence check
martin-g Feb 18, 2026
d81a4a7
Add librt as a dev dependency
martin-g Feb 18, 2026
a411989
Drop librt as dev-dep. It fails at CI with another error
martin-g Feb 18, 2026
9a2ff4e
Do not test Python 3.9 since it is not supported
martin-g Feb 18, 2026
34eae25
Try with mypy<1.19
martin-g Feb 18, 2026
a427cfb
Use uv for the Java interop tests too
martin-g Feb 18, 2026
d401b80
Remove the flake8 settings. Now we use ruff
martin-g Feb 18, 2026
9e71d4b
Update uv.lock
martin-g Feb 18, 2026
d73daa5
Set cwd to lang/py for `uv sync`. Do not use uv for C# scripts
martin-g Feb 18, 2026
cb2b731
Update the usage. Wrap variables in quotes
martin-g Feb 18, 2026
7023260
Move more entries from setup.cfg to pyproject.toml
martin-g Feb 18, 2026
7ff957b
Remove typechecks dependency. It seems to be tox specific
martin-g Feb 18, 2026
f246a65
Do not shadow Python built-in `type`
martin-g Feb 18, 2026
cdedb56
Use `--frozen` for `uv sync` in CI
martin-g Feb 18, 2026
78f3a5c
Update setup-uv Github action to v7
martin-g Feb 18, 2026
e462824
Fix the package name for AvroException
martin-g Feb 18, 2026
e0484b3
First declare the local variable and then assign it a value
martin-g Feb 18, 2026
3c7c8c7
Use Python 3.10 for mypy
martin-g Feb 18, 2026
aeb32b9
Move "scripts" from setup.cfg to pyproject.toml
martin-g Feb 18, 2026
c15c680
Re-add the optional dependencies from setup.cfg to pyproject.toml
martin-g Feb 18, 2026
4724e89
Re-add the package-data from setup.cfg to pyproject.toml
martin-g Feb 18, 2026
91747aa
Use SHA for the external actions
martin-g Feb 19, 2026
712fde3
No need to call `build.sh` thru `uv run`
martin-g Feb 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 32 additions & 30 deletions .github/workflows/test-lang-py.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

name: 'Test Python'
name: "Test Python"
on:
workflow_dispatch:
push:
branches: [ main ]
branches: [main]
pull_request:
branches: [ main ]
branches: [main]
paths:
- .github/workflows/test-lang-py.yml
- lang/py/**
- .github/workflows/test-lang-py.yml
- lang/py/**

defaults:
run:
Expand All @@ -38,16 +38,17 @@ jobs:
strategy:
fail-fast: false
matrix:
os:
os:
- ubuntu-latest
- ubuntu-24.04-arm
python:
- '3.13'
- '3.12'
- '3.11'
- '3.10'
- 'pypy-3.11'
- 'pypy-3.10'
- "3.14"
- "3.13"
- "3.12"
- "3.11"
- "3.10"
- "pypy-3.11"
- "pypy-3.10"

steps:
- uses: actions/checkout@v6
Expand All @@ -57,6 +58,9 @@ jobs:
with:
python-version: ${{ matrix.python }}

- name: Setup uv
uses: astral-sh/setup-uv@v6

- name: Apt Install Compression Libs
run: |
sudo apt-get update && \
Expand All @@ -67,33 +71,30 @@ jobs:
libzstd-dev

- name: Install Dependencies
run: |
python3 -m pip install --upgrade pip setuptools tox
run: uv sync

- name: Lint
if: ${{ matrix.python == '3.10' }}
run: python3 -m tox -e lint
run: uv run ./build.sh lint

- name: Typechecks
if: ${{ matrix.python == '3.10' }}
run: python3 -m tox -e typechecks
run: uv run ./build.sh typechecks

- name: Test
run: python3 -m tox -e py
run: uv run ./build.sh test

interop:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
os:
- ubuntu-latest
- ubuntu-24.04-arm
python:
- '3.11'
- '3.10'
- '3.9'
- 'pypy-3.10'
- "3.13"
- "3.12"

steps:
- uses: actions/checkout@v6
Expand All @@ -103,6 +104,9 @@ jobs:
with:
python-version: ${{ matrix.python }}

- name: Setup uv
uses: astral-sh/setup-uv@v6

- name: Apt Install Compression Libs
run: |
sudo apt-get update && \
Expand All @@ -113,9 +117,7 @@ jobs:
libzstd-dev

- name: Install Dependencies
run: |
python3 -m pip install --upgrade pip setuptools tox
python3 -m pip install python-snappy zstandard
run: uv sync

- name: Cache Local Maven Repository
uses: actions/cache@v5
Expand All @@ -125,17 +127,17 @@ jobs:
restore-keys: |
${{ runner.os }}-maven-

- name: 'Setup Temurin JDK 8, 11, 17 & 21'
- name: "Setup Temurin JDK 8, 11, 17 & 21"
uses: actions/setup-java@v5
with:
distribution: 'temurin'
distribution: "temurin"
java-version: |
8
11
17
21

- name: 'Setup Maven 3.9.11'
- name: "Setup Maven 3.9.11"
uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
with:
maven-version: 3.9.11
Expand All @@ -153,7 +155,7 @@ jobs:
run: mvn -B -P interop-data-generate generate-resources

- name: Generate Interop Data
run: ./build.sh interop-data-generate
run: uv run ./build.sh interop-data-generate

- name: Run Interop Tests
run: ./build.sh interop-data-test
run: uv run ./build.sh interop-data-test
2 changes: 1 addition & 1 deletion lang/py/avro/ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def respond(self, call_request):
else:
writers_schema = local_message.errors
self.write_error(writers_schema, error, buffer_encoder)
except schema.AvroException as e:
except avro.schema.AvroException as e:
error = avro.errors.AvroRemoteException(str(e))
buffer_encoder = avro.io.BinaryEncoder(io.BytesIO())
META_WRITER.write(response_metadata, buffer_encoder)
Expand Down
13 changes: 10 additions & 3 deletions lang/py/avro/test/sample_http_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,18 @@
{
"name": "Message",
"type": "record",
"fields": [{"name": "to", "type": "string"}, {"name": "from", "type": "string"}, {"name": "body", "type": "string"}],
"fields": [
{"name": "to", "type": "string"},
{"name": "from", "type": "string"},
{"name": "body", "type": "string"},
],
}
],
"messages": {
"send": {"request": [{"name": "message", "type": "Message"}], "response": "string"},
"send": {
"request": [{"name": "message", "type": "Message"}],
"response": "string",
},
"replay": {"request": [], "response": "string"},
},
}
Expand Down Expand Up @@ -71,7 +78,7 @@ def do_POST(self) -> None:


def main():
mail_server = http_server.HTTPServer(SERVER_ADDRESS, MailHandler)
mail_server = http.server.HTTPServer(SERVER_ADDRESS, MailHandler)
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea how this worked before

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably didn't. Is there evidence it was ever covered?

mail_server.allow_reuse_address = True
mail_server.serve_forever()

Expand Down
84 changes: 68 additions & 16 deletions lang/py/avro/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,25 @@ class DefaultValueTestCaseType(TypedDict):
{"value": {"car": {"value": "head"}, "cdr": {"value": None}}},
),
(
{"type": "record", "name": "record", "fields": [{"name": "value", "type": "int"}, {"name": "next", "type": ["null", "record"]}]},
{
"type": "record",
"name": "record",
"fields": [
{"name": "value", "type": "int"},
{"name": "next", "type": ["null", "record"]},
],
},
{"value": 0, "next": {"value": 1, "next": None}},
),
(
{"type": "record", "name": "ns.long", "fields": [{"name": "value", "type": "int"}, {"name": "next", "type": ["null", "ns.long"]}]},
{
"type": "record",
"name": "ns.long",
"fields": [
{"name": "value", "type": "int"},
{"name": "next", "type": ["null", "ns.long"]},
],
},
{"value": 0, "next": {"value": 1, "next": None}},
),
# Optional logical types.
Expand Down Expand Up @@ -278,7 +292,11 @@ def write_datum(datum: object, writers_schema: avro.schema.Schema) -> Tuple[io.B
return writer, encoder, datum_writer


def read_datum(buffer: io.BytesIO, writers_schema: avro.schema.Schema, readers_schema: Optional[avro.schema.Schema] = None) -> object:
def read_datum(
buffer: io.BytesIO,
writers_schema: avro.schema.Schema,
readers_schema: Optional[avro.schema.Schema] = None,
) -> object:
reader = io.BytesIO(buffer.getvalue())
decoder = avro.io.BinaryDecoder(reader)
datum_reader = avro.io.DatumReader(writers_schema, readers_schema)
Expand All @@ -302,7 +320,7 @@ def io_valid(self) -> None:
"""
In these cases, the provided data should be valid with the given schema.
"""
with warnings.catch_warnings(record=True) as actual_warnings:
with warnings.catch_warnings(record=True) as _actual_warnings:
self.assertTrue(
avro.io.validate(self.test_schema, self.test_datum),
f"{self.test_datum} did not validate in the schema {self.test_schema}",
Expand All @@ -326,7 +344,7 @@ def io_round_trip(self) -> None:
"""
A datum should be the same after being encoded and then decoded.
"""
with warnings.catch_warnings(record=True) as actual_warnings:
with warnings.catch_warnings(record=True) as _actual_warnings:
writer, encoder, datum_writer = write_datum(self.test_datum, self.test_schema)
round_trip_datum = read_datum(writer, self.test_schema)
expected: object
Expand Down Expand Up @@ -367,7 +385,7 @@ def __init__(self, skip: bool, test_type: str, test_datum: object, test_hex: byt
warnings.simplefilter("always")

def check_binary_encoding(self) -> None:
with warnings.catch_warnings(record=True) as actual_warnings:
with warnings.catch_warnings(record=True) as _actual_warnings:
writer, encoder, datum_writer = write_datum(self.test_datum, self.writers_schema)
writer.seek(0)
hex_val = avro_hexlify(writer)
Expand All @@ -379,7 +397,7 @@ def check_binary_encoding(self) -> None:

def check_skip_encoding(self) -> None:
VALUE_TO_READ = 6253
with warnings.catch_warnings(record=True) as actual_warnings:
with warnings.catch_warnings(record=True) as _actual_warnings:
# write the value to skip and a known value
writer, encoder, datum_writer = write_datum(self.test_datum, self.writers_schema)
datum_writer.write(VALUE_TO_READ, encoder)
Expand Down Expand Up @@ -418,7 +436,7 @@ def check_schema_promotion(self) -> None:
# note that checking writers_schema.type in read_data
# allows us to handle promotion correctly
DATUM_TO_WRITE = 219
with warnings.catch_warnings(record=True) as actual_warnings:
with warnings.catch_warnings(record=True) as _actual_warnings:
writer, enc, dw = write_datum(DATUM_TO_WRITE, self.writers_schema)
datum_read = read_datum(writer, self.writers_schema, self.readers_schema)
self.assertEqual(
Expand All @@ -429,7 +447,11 @@ def check_schema_promotion(self) -> None:


class DefaultValueTestCase(unittest.TestCase):
def __init__(self, field_type: Collection[str], default: Union[Dict[str, int], List[int], None, float, str]) -> None:
def __init__(
self,
field_type: Collection[str],
default: Union[Dict[str, int], List[int], None, float, str],
) -> None:
"""Ignore the normal signature for unittest.TestCase because we are generating
many test cases from this one class. This is safe as long as the autoloader
ignores this class. The autoloader will ignore this class as long as it has
Expand All @@ -443,7 +465,7 @@ def __init__(self, field_type: Collection[str], default: Union[Dict[str, int], L

def check_default_value(self) -> None:
datum_read: DefaultValueTestCaseType
with warnings.catch_warnings(record=True) as actual_warnings:
with warnings.catch_warnings(record=True) as _actual_warnings:
datum_to_read = cast(DefaultValueTestCaseType, {"H": self.default})
readers_schema = avro.schema.parse(
json.dumps(
Expand All @@ -461,7 +483,10 @@ def check_default_value(self) -> None:
)
)
writer, _, _ = write_datum(LONG_RECORD_DATUM, LONG_RECORD_SCHEMA)
datum_read_ = cast(DefaultValueTestCaseType, read_datum(writer, LONG_RECORD_SCHEMA, readers_schema))
datum_read_ = cast(
DefaultValueTestCaseType,
read_datum(writer, LONG_RECORD_SCHEMA, readers_schema),
)
datum_read = {"H": cast(bytes, datum_read_["H"]).decode()} if isinstance(datum_read_["H"], bytes) else datum_read_
self.assertEqual(datum_to_read, datum_read)

Expand Down Expand Up @@ -505,7 +530,11 @@ def test_deserialization_fails(self) -> None:
enc_bytes = writer_bio.getvalue()
reader = avro.io.DatumReader(reader_schema)
with io.BytesIO(enc_bytes) as reader_bio:
self.assertRaises(avro.errors.InvalidAvroBinaryEncoding, reader.read, avro.io.BinaryDecoder(reader_bio))
self.assertRaises(
avro.errors.InvalidAvroBinaryEncoding,
reader.read,
avro.io.BinaryDecoder(reader_bio),
)

incompatibleUserRecord = {"name": -10, "age": 21, "location": "Woodford"}
with io.BytesIO() as writer_bio:
Expand All @@ -514,7 +543,11 @@ def test_deserialization_fails(self) -> None:
enc_bytes = writer_bio.getvalue()
reader = avro.io.DatumReader(reader_schema)
with io.BytesIO(enc_bytes) as reader_bio:
self.assertRaises(avro.errors.InvalidAvroBinaryEncoding, reader.read, avro.io.BinaryDecoder(reader_bio))
self.assertRaises(
avro.errors.InvalidAvroBinaryEncoding,
reader.read,
avro.io.BinaryDecoder(reader_bio),
)


class TestMisc(unittest.TestCase):
Expand Down Expand Up @@ -648,18 +681,37 @@ def test_type_exception_int(self) -> None:
assert str(exc.exception) == 'The datum "Bad" provided for "F" is not an example of the schema "int"'

def test_type_exception_long(self) -> None:
writers_schema = avro.schema.parse(json.dumps({"type": "record", "name": "Test", "fields": [{"name": "foo", "type": "long"}]}))
writers_schema = avro.schema.parse(
json.dumps(
{
"type": "record",
"name": "Test",
"fields": [{"name": "foo", "type": "long"}],
}
)
)
datum_to_write = {"foo": 5.0}

with self.assertRaises(avro.errors.AvroTypeException) as exc:
write_datum(datum_to_write, writers_schema)
assert str(exc.exception) == 'The datum "5.0" provided for "foo" is not an example of the schema "long"'

def test_type_exception_record(self) -> None:
writers_schema = avro.schema.parse(json.dumps({"type": "record", "name": "Test", "fields": [{"name": "foo", "type": "long"}]}))
writers_schema = avro.schema.parse(
json.dumps(
{
"type": "record",
"name": "Test",
"fields": [{"name": "foo", "type": "long"}],
}
)
)
datum_to_write = ("foo", 5.0)

with self.assertRaisesRegex(avro.errors.AvroTypeException, r"The datum \".*\" provided for \".*\" is not an example of the schema [\s\S]*"):
with self.assertRaisesRegex(
avro.errors.AvroTypeException,
r"The datum \".*\" provided for \".*\" is not an example of the schema [\s\S]*",
):
write_datum(datum_to_write, writers_schema)


Expand Down
Loading
Loading