Skip to content

Commit ff6c18a

Browse files
authored
Merge pull request #63 from PySport/feat/real-domain-events
Redesign event log with proper separation of concerns
2 parents 7b95106 + 8c66121 commit ff6c18a

5 files changed

Lines changed: 170 additions & 206 deletions

File tree

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .consumer import EventLogConsumer
2+
from .event_log import EventLog
23
from .subscriber import EventLogSubscriber
34

4-
__all__ = ["EventLogConsumer", "EventLogSubscriber"]
5+
__all__ = ["EventLog", "EventLogConsumer", "EventLogSubscriber"]

ingestify/infra/event_log/consumer.py

Lines changed: 15 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
import json
21
import logging
32
import time
43
from typing import Callable, Optional
54

65
from sqlalchemy import create_engine, select
76

7+
from .event_log import EventLog
88
from .tables import get_tables
99

1010
logger = logging.getLogger(__name__)
@@ -14,35 +14,34 @@ class EventLogConsumer:
1414
"""Cursor-based consumer for the event_log table.
1515
1616
Usage (run once, e.g. cron):
17-
EventLogConsumer.from_config("ingestify.yaml", reader_name="importer").run(on_event)
17+
EventLogConsumer.from_config("ingestify.yaml", reader_name="default").run(on_event)
1818
1919
Usage (keep running, poll every 5 seconds):
20-
EventLogConsumer.from_config("ingestify.yaml", reader_name="importer").run(on_event, poll_interval=5)
20+
EventLogConsumer.from_config("ingestify.yaml", reader_name="default").run(on_event, poll_interval=5)
2121
2222
Exit codes (returned by run):
2323
0 Batch processed successfully (or nothing new).
2424
1 A processing error occurred; cursor was NOT advanced.
2525
"""
2626

2727
def __init__(self, database_url: str, reader_name: str, table_prefix: str = ""):
28+
engine = create_engine(database_url)
29+
self._event_log = EventLog(engine, table_prefix)
2830
self._reader_name = reader_name
31+
self._engine = engine
2932
tables = get_tables(table_prefix)
30-
self._metadata = tables["metadata"]
31-
self._event_log_table = tables["event_log_table"]
3233
self._reader_state_table = tables["reader_state_table"]
33-
self._engine = create_engine(database_url)
34-
self._metadata.create_all(self._engine, checkfirst=True)
34+
self._reader_state_table.create(engine, checkfirst=True)
3535

3636
@classmethod
3737
def from_config(cls, config_file: str, reader_name: str) -> "EventLogConsumer":
3838
from pyaml_env import parse_config
3939

4040
config = parse_config(config_file, default_value="")
4141
main = config["main"]
42-
database_url = main["metadata_url"]
4342
table_prefix = main.get("metadata_options", {}).get("table_prefix", "")
4443
return cls(
45-
database_url=database_url,
44+
database_url=main["metadata_url"],
4645
reader_name=reader_name,
4746
table_prefix=table_prefix,
4847
)
@@ -70,18 +69,6 @@ def _get_last_event_id(self, conn) -> int:
7069
).fetchone()
7170
return row[0] if row else 0
7271

73-
def _fetch_batch(self, conn, last_event_id: int, batch_size: int) -> list:
74-
return conn.execute(
75-
select(
76-
self._event_log_table.c.id,
77-
self._event_log_table.c.event_type,
78-
self._event_log_table.c.payload_json,
79-
)
80-
.where(self._event_log_table.c.id > last_event_id)
81-
.order_by(self._event_log_table.c.id)
82-
.limit(batch_size)
83-
).fetchall()
84-
8572
def _update_cursor(self, conn, event_id: int) -> None:
8673
conn.execute(
8774
self._reader_state_table.update()
@@ -94,27 +81,22 @@ def _run_once(self, on_event: Callable, batch_size: int = 100) -> int:
9481
with self._engine.connect() as conn:
9582
self._ensure_reader_state(conn)
9683
last_id = self._get_last_event_id(conn)
97-
rows = self._fetch_batch(conn, last_id, batch_size)
9884

99-
if not rows:
100-
return 0
85+
rows = self._event_log.fetch_batch(last_id, batch_size)
86+
if not rows:
87+
return 0
10188

102-
for event_id, event_type, payload_json in rows:
89+
with self._engine.connect() as conn:
90+
for event_id, event in rows:
10391
try:
104-
payload = (
105-
payload_json
106-
if isinstance(payload_json, dict)
107-
else json.loads(payload_json)
108-
)
109-
on_event(event_type, payload)
92+
on_event(event)
11093
except Exception:
11194
logger.exception(
11295
"Failed to process event id=%d type=%r — cursor NOT advanced",
11396
event_id,
114-
event_type,
97+
type(event).event_type,
11598
)
11699
return 1
117-
118100
self._update_cursor(conn, event_id)
119101

120102
return 0
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import logging
2+
3+
from sqlalchemy import select
4+
5+
from ingestify.domain.models.dataset.events import (
6+
DatasetCreated,
7+
MetadataUpdated,
8+
RevisionAdded,
9+
)
10+
from ingestify.domain.models.event.domain_event import DomainEvent
11+
from ingestify.utils import utcnow
12+
13+
from .tables import get_tables
14+
15+
logger = logging.getLogger(__name__)
16+
17+
_EVENT_TYPE_MAP = {
18+
"dataset_created": DatasetCreated,
19+
"revision_added": RevisionAdded,
20+
"metadata_updated": MetadataUpdated,
21+
}
22+
23+
24+
class EventLog:
25+
def __init__(self, engine, table_prefix: str = ""):
26+
tables = get_tables(table_prefix)
27+
self._engine = engine
28+
self._table = tables["event_log_table"]
29+
self._table.create(engine, checkfirst=True)
30+
31+
def write(self, event: DomainEvent) -> None:
32+
with self._engine.connect() as conn:
33+
conn.execute(
34+
self._table.insert().values(
35+
event_type=type(event).event_type,
36+
payload_json=event.model_dump(mode="json"),
37+
source=event.dataset.provider,
38+
dataset_id=event.dataset.dataset_id,
39+
created_at=utcnow(),
40+
)
41+
)
42+
conn.commit()
43+
44+
def fetch_batch(self, last_event_id: int, batch_size: int) -> list:
45+
"""Returns a list of (event_id, domain_event) tuples."""
46+
with self._engine.connect() as conn:
47+
rows = conn.execute(
48+
select(
49+
self._table.c.id,
50+
self._table.c.event_type,
51+
self._table.c.payload_json,
52+
)
53+
.where(self._table.c.id > last_event_id)
54+
.order_by(self._table.c.id)
55+
.limit(batch_size)
56+
).fetchall()
57+
58+
result = []
59+
for event_id, event_type, payload_json in rows:
60+
event_cls = _EVENT_TYPE_MAP.get(event_type)
61+
if event_cls is None:
62+
logger.debug(
63+
"Skipping unknown event_type=%r (id=%d)", event_type, event_id
64+
)
65+
continue
66+
result.append((event_id, event_cls.model_validate(payload_json)))
67+
68+
return result
Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
import logging
22

33
from ingestify.domain.models.event import Subscriber
4-
from ingestify.utils import utcnow
54

6-
from .tables import get_tables
5+
from .event_log import EventLog
76

87
logger = logging.getLogger(__name__)
98

@@ -21,38 +20,25 @@ class EventLogSubscriber(Subscriber):
2120
def __init__(self, store):
2221
super().__init__(store)
2322
session_provider = store.dataset_repository.session_provider
24-
tables = get_tables(session_provider.table_prefix)
25-
tables["metadata"].create_all(session_provider.engine, checkfirst=True)
26-
self._engine = session_provider.engine
27-
self._event_log_table = tables["event_log_table"]
23+
self._event_log = EventLog(
24+
session_provider.engine, session_provider.table_prefix
25+
)
2826

29-
def _write(self, event_type: str, dataset) -> None:
27+
def _write(self, event) -> None:
3028
try:
31-
with self._engine.connect() as conn:
32-
conn.execute(
33-
self._event_log_table.insert().values(
34-
event_type=event_type,
35-
payload_json=dataset.model_dump(
36-
mode="json", exclude={"revisions"}
37-
),
38-
source=dataset.provider,
39-
dataset_id=dataset.dataset_id,
40-
created_at=utcnow(),
41-
)
42-
)
43-
conn.commit()
29+
self._event_log.write(event)
4430
except Exception:
4531
logger.exception(
4632
"EventLogSubscriber: failed to write event_type=%r dataset_id=%r",
47-
event_type,
48-
dataset.dataset_id,
33+
type(event).event_type,
34+
event.dataset.dataset_id,
4935
)
5036

5137
def on_dataset_created(self, event) -> None:
52-
self._write(type(event).event_type, event.dataset)
38+
self._write(event)
5339

5440
def on_metadata_updated(self, event) -> None:
55-
self._write(type(event).event_type, event.dataset)
41+
self._write(event)
5642

5743
def on_revision_added(self, event) -> None:
58-
self._write(type(event).event_type, event.dataset)
44+
self._write(event)

0 commit comments

Comments
 (0)