|
| 1 | +"""Apache Kafka transport. Requires the ``kafka`` extra: |
| 2 | +
|
| 3 | + pip install "babelqueue[kafka]" |
| 4 | +
|
| 5 | +Producing writes the canonical envelope as the record value and projects the contract |
| 6 | +envelope fields onto native Kafka record headers (UTF-8 byte strings): ``bq-job`` = URN, |
| 7 | +``bq-trace-id`` = trace_id, ``bq-message-id`` = meta.id, plus ``bq-schema-version`` / |
| 8 | +``bq-source-lang`` / ``bq-attempts`` — so a Java/.NET/... peer can route on ``bq-job`` |
| 9 | +without parsing the body — with the record timestamp mirroring ``meta.created_at``. |
| 10 | +Consuming is process-then-commit: ``poll`` reserves a record (``enable.auto.commit=false``) |
| 11 | +and ``ack`` commits the offset only after the handler returns (at-least-once). Kafka has no |
| 12 | +native delivery count, so the ``bq-attempts`` header is the authoritative retry counter (the |
| 13 | +body's ``attempts`` is the fallback for non-BabelQueue producers); the runtime owns retry by |
| 14 | +republishing with attempts+1 and dead-letters to ``<queue>.dlq``. |
| 15 | +
|
| 16 | +This implements §6 of the broker-bindings contract. The envelope is unchanged |
| 17 | +(``schema_version`` stays 1); Apache Kafka is purely additive. |
| 18 | +
|
| 19 | +URL form: ``kafka://host:9092[,host2:9092]``. The default consumer group is ``babelqueue``. |
| 20 | +For a custom client, build the transport directly and pass it via ``BabelQueue(transport=...)`` |
| 21 | +or ``KafkaTransport(producer=..., consumer_factory=...)``. |
| 22 | +""" |
| 23 | + |
| 24 | +from __future__ import annotations |
| 25 | + |
| 26 | +from typing import Any, Callable, Dict, List, Optional, Tuple |
| 27 | + |
| 28 | +from .codec import EnvelopeCodec |
| 29 | +from .transport import ReceivedMessage, Transport |
| 30 | + |
| 31 | +Headers = List[Tuple[str, bytes]] |
| 32 | + |
| 33 | + |
| 34 | +def _brokers_from_url(url: str) -> str: |
| 35 | + rest = url.split("://", 1)[1] if "://" in url else url |
| 36 | + return rest.split("/", 1)[0] or "localhost:9092" |
| 37 | + |
| 38 | + |
| 39 | +class KafkaTransport(Transport): |
| 40 | + def __init__( |
| 41 | + self, |
| 42 | + url: str = "kafka://localhost:9092", |
| 43 | + *, |
| 44 | + producer: Any = None, |
| 45 | + consumer_factory: Optional[Callable[[str], Any]] = None, |
| 46 | + group_id: str = "babelqueue", |
| 47 | + **client_config: Any, |
| 48 | + ) -> None: |
| 49 | + self._brokers = _brokers_from_url(url or "kafka://localhost:9092") |
| 50 | + self._group_id = group_id |
| 51 | + self._client_config = client_config |
| 52 | + self._producer = producer |
| 53 | + self._consumer_factory = consumer_factory |
| 54 | + self._consumers: Dict[str, Any] = {} |
| 55 | + |
| 56 | + # -- helpers ------------------------------------------------------------ |
| 57 | + |
| 58 | + def _producer_(self) -> Any: |
| 59 | + if self._producer is None: |
| 60 | + self._producer = self._build_producer() # pragma: no cover - needs Kafka / network |
| 61 | + return self._producer |
| 62 | + |
| 63 | + def _build_producer(self) -> Any: # pragma: no cover - needs Kafka / network |
| 64 | + from confluent_kafka import Producer |
| 65 | + |
| 66 | + return Producer({"bootstrap.servers": self._brokers, **self._client_config}) |
| 67 | + |
| 68 | + def _consumer(self, queue: str) -> Any: |
| 69 | + consumer = self._consumers.get(queue) |
| 70 | + if consumer is None: |
| 71 | + if self._consumer_factory is not None: |
| 72 | + consumer = self._consumer_factory(queue) |
| 73 | + else: |
| 74 | + consumer = self._build_consumer(queue) # pragma: no cover - needs Kafka / network |
| 75 | + self._consumers[queue] = consumer |
| 76 | + return consumer |
| 77 | + |
| 78 | + def _build_consumer(self, queue: str) -> Any: # pragma: no cover - needs Kafka / network |
| 79 | + from confluent_kafka import Consumer |
| 80 | + |
| 81 | + consumer = Consumer( |
| 82 | + { |
| 83 | + "bootstrap.servers": self._brokers, |
| 84 | + "group.id": self._group_id, |
| 85 | + "enable.auto.commit": False, |
| 86 | + "auto.offset.reset": "earliest", |
| 87 | + **self._client_config, |
| 88 | + } |
| 89 | + ) |
| 90 | + consumer.subscribe([queue]) |
| 91 | + return consumer |
| 92 | + |
| 93 | + @staticmethod |
| 94 | + def _projection(body: str) -> Headers: |
| 95 | + """Native Kafka record headers (UTF-8 byte values) — a redundant, routable view of the |
| 96 | + body: bq-job/bq-trace-id/bq-message-id + bq-schema-version/lang/attempts. §6.3.""" |
| 97 | + env = EnvelopeCodec.decode(body) |
| 98 | + if not env: |
| 99 | + return [] |
| 100 | + meta = env.get("meta") or {} |
| 101 | + |
| 102 | + headers: Headers = [] |
| 103 | + |
| 104 | + def add(key: str, value: Any) -> None: |
| 105 | + if value is not None and value != "": |
| 106 | + headers.append((key, str(value).encode("utf-8"))) |
| 107 | + |
| 108 | + add("bq-job", env.get("job")) |
| 109 | + add("bq-trace-id", env.get("trace_id")) |
| 110 | + add("bq-message-id", meta.get("id")) |
| 111 | + if meta.get("schema_version") is not None: |
| 112 | + headers.append(("bq-schema-version", str(meta["schema_version"]).encode("utf-8"))) |
| 113 | + add("bq-source-lang", meta.get("lang")) |
| 114 | + headers.append(("bq-attempts", str(int(env.get("attempts", 0) or 0)).encode("utf-8"))) |
| 115 | + return headers |
| 116 | + |
| 117 | + @staticmethod |
| 118 | + def _reconcile(body: str, headers: Any) -> str: |
| 119 | + """Set attempts to the authoritative bq-attempts header (falling back to the body's own |
| 120 | + attempts when the header is absent/unparseable — a non-BabelQueue producer). §6.5.""" |
| 121 | + env = EnvelopeCodec.decode(body) |
| 122 | + if not env: |
| 123 | + return body |
| 124 | + attempts = int(env.get("attempts", 0) or 0) |
| 125 | + for key, value in headers or []: |
| 126 | + if key == "bq-attempts": |
| 127 | + raw = value.decode("utf-8") if isinstance(value, (bytes, bytearray)) else str(value) |
| 128 | + try: |
| 129 | + attempts = int(raw) |
| 130 | + except (ValueError, TypeError): |
| 131 | + pass |
| 132 | + break |
| 133 | + if attempts == int(env.get("attempts", 0) or 0): |
| 134 | + return body |
| 135 | + env["attempts"] = attempts |
| 136 | + return EnvelopeCodec.encode(env) |
| 137 | + |
| 138 | + @staticmethod |
| 139 | + def _payload(message: Any) -> str: |
| 140 | + value = message.value() |
| 141 | + if isinstance(value, (bytes, bytearray)): |
| 142 | + return value.decode("utf-8") |
| 143 | + return str(value) if value is not None else "" |
| 144 | + |
| 145 | + # -- Transport ---------------------------------------------------------- |
| 146 | + |
| 147 | + def publish(self, queue: str, body: str) -> None: |
| 148 | + env = EnvelopeCodec.decode(body) |
| 149 | + meta = env.get("meta") or {} |
| 150 | + producer = self._producer_() |
| 151 | + kwargs: Dict[str, Any] = {"value": body.encode("utf-8"), "headers": self._projection(body)} |
| 152 | + created_at = meta.get("created_at") |
| 153 | + if created_at: |
| 154 | + kwargs["timestamp"] = int(created_at) |
| 155 | + producer.produce(queue, **kwargs) |
| 156 | + producer.poll(0) |
| 157 | + |
| 158 | + def pop(self, queue: str, timeout: float = 1.0) -> Optional[ReceivedMessage]: |
| 159 | + wait = timeout if timeout and timeout > 0 else 1.0 |
| 160 | + message = self._consumer(queue).poll(wait) |
| 161 | + if message is None or message.error() is not None: |
| 162 | + return None |
| 163 | + body = self._reconcile(self._payload(message), message.headers()) |
| 164 | + return ReceivedMessage(body=body, queue=queue, handle=message) |
| 165 | + |
| 166 | + def ack(self, message: ReceivedMessage) -> None: |
| 167 | + if message.handle is None: |
| 168 | + return |
| 169 | + self._consumer(message.queue).commit(message=message.handle, asynchronous=False) |
| 170 | + |
| 171 | + def close(self) -> None: # pragma: no cover - resource cleanup |
| 172 | + try: |
| 173 | + if self._producer is not None: |
| 174 | + self._producer.flush() |
| 175 | + except Exception: # noqa: BLE001 - best-effort cleanup |
| 176 | + pass |
| 177 | + for consumer in self._consumers.values(): |
| 178 | + try: |
| 179 | + consumer.close() |
| 180 | + except Exception: # noqa: BLE001 - best-effort cleanup |
| 181 | + pass |
0 commit comments