All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 3m20s
Redis: per-app DB index (0-5) with shared auth DB 15 for SSO keys; flushdb replaces flushall so deploys don't wipe cross-app auth state. Postgres: drop 13 cross-domain FK constraints (migration v2t0p8q9r0), remove dead ORM relationships, add explicit joins for 4 live ones. Multi-engine sessions (account + federation) ready for per-domain DBs via DATABASE_URL_ACCOUNT / DATABASE_URL_FEDERATION env vars. All URLs initially point to the same appdb — zero behaviour change until split-databases.sh is run to migrate data to per-domain DBs. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
247 lines
9.6 KiB
Python
247 lines
9.6 KiB
Python
"""
|
|
Event processor — polls the ap_activities table and dispatches to registered
|
|
activity handlers.
|
|
|
|
Runs as an asyncio background task within each app process.
|
|
Uses SELECT ... FOR UPDATE SKIP LOCKED for safe concurrent processing.
|
|
|
|
A dedicated asyncpg LISTEN connection wakes the poll loop immediately when
|
|
emit_activity() fires NOTIFY ap_activity_pending, so latency drops from
|
|
~2 seconds (poll interval) to sub-100 ms. The fixed-interval poll remains
|
|
as a safety-net fallback.
|
|
|
|
The LISTEN connection and poll queries target the federation database
|
|
(DATABASE_URL_FEDERATION) since ap_activities lives there.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import traceback
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
import asyncpg
|
|
from sqlalchemy import select, update
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from shared.db.session import get_federation_session, DATABASE_URL_FEDERATION
|
|
from shared.models.federation import APActivity
|
|
from .bus import get_activity_handlers
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class EventProcessor:
|
|
"""Background event processor that polls the ap_activities table."""
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
app_name: str | None = None,
|
|
poll_interval: float = 2.0,
|
|
batch_size: int = 10,
|
|
stuck_timeout: float = 300.0,
|
|
):
|
|
self._app_name = app_name
|
|
self._poll_interval = poll_interval
|
|
self._batch_size = batch_size
|
|
self._stuck_timeout = stuck_timeout # seconds before "processing" → "pending"
|
|
self._task: asyncio.Task | None = None
|
|
self._listen_task: asyncio.Task | None = None
|
|
self._listen_conn: asyncpg.Connection | None = None
|
|
self._wake = asyncio.Event()
|
|
self._running = False
|
|
self._reap_counter = 0
|
|
|
|
# ------------------------------------------------------------------
|
|
# Lifecycle
|
|
# ------------------------------------------------------------------
|
|
|
|
async def start(self) -> None:
|
|
"""Start the background polling loop."""
|
|
if self._task is not None:
|
|
return
|
|
self._running = True
|
|
self._listen_task = asyncio.create_task(self._listen_for_notify())
|
|
self._task = asyncio.create_task(self._poll_loop())
|
|
|
|
async def stop(self) -> None:
|
|
"""Stop the background polling loop gracefully."""
|
|
self._running = False
|
|
if self._listen_task is not None:
|
|
self._listen_task.cancel()
|
|
try:
|
|
await self._listen_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
self._listen_task = None
|
|
if self._listen_conn is not None and not self._listen_conn.is_closed():
|
|
await self._listen_conn.close()
|
|
self._listen_conn = None
|
|
if self._task is not None:
|
|
self._task.cancel()
|
|
try:
|
|
await self._task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
self._task = None
|
|
|
|
# ------------------------------------------------------------------
|
|
# LISTEN — wake poll loop on NOTIFY
|
|
# ------------------------------------------------------------------
|
|
|
|
async def _listen_for_notify(self) -> None:
|
|
"""Maintain a LISTEN connection and wake the poll loop on NOTIFY."""
|
|
dsn = DATABASE_URL_FEDERATION.replace("+asyncpg", "")
|
|
while self._running:
|
|
try:
|
|
self._listen_conn = await asyncpg.connect(dsn)
|
|
await self._listen_conn.add_listener(
|
|
"ap_activity_pending", self._on_notify
|
|
)
|
|
log.info("LISTEN ap_activity_pending active")
|
|
# Keep alive with periodic health check
|
|
while self._running:
|
|
await asyncio.sleep(30)
|
|
await self._listen_conn.execute("SELECT 1")
|
|
except asyncio.CancelledError:
|
|
break
|
|
except Exception:
|
|
log.warning("LISTEN connection lost, reconnecting…", exc_info=True)
|
|
await asyncio.sleep(2)
|
|
finally:
|
|
if self._listen_conn is not None and not self._listen_conn.is_closed():
|
|
await self._listen_conn.close()
|
|
self._listen_conn = None
|
|
|
|
def _on_notify(self, conn, pid, channel, payload) -> None:
|
|
"""Called by asyncpg when a NOTIFY arrives."""
|
|
self._wake.set()
|
|
|
|
# ------------------------------------------------------------------
|
|
# Poll loop
|
|
# ------------------------------------------------------------------
|
|
|
|
async def _poll_loop(self) -> None:
|
|
while self._running:
|
|
try:
|
|
# Periodically recover stuck activities (~every 30 cycles)
|
|
self._reap_counter += 1
|
|
if self._reap_counter >= 30:
|
|
self._reap_counter = 0
|
|
await self._recover_stuck()
|
|
|
|
# Clear before processing so any NOTIFY that arrives during
|
|
# _process_batch sets the event and we loop immediately.
|
|
self._wake.clear()
|
|
processed = await self._process_batch()
|
|
if processed == 0:
|
|
try:
|
|
await asyncio.wait_for(
|
|
self._wake.wait(), timeout=self._poll_interval
|
|
)
|
|
except asyncio.TimeoutError:
|
|
pass
|
|
# processed > 0 → loop immediately to drain the queue
|
|
except asyncio.CancelledError:
|
|
break
|
|
except Exception:
|
|
traceback.print_exc()
|
|
await asyncio.sleep(self._poll_interval)
|
|
|
|
async def _recover_stuck(self) -> None:
|
|
"""Reset activities stuck in 'processing' back to 'pending'.
|
|
|
|
This handles the case where a process crashed mid-handler.
|
|
Combined with idempotent handlers, this gives at-least-once delivery.
|
|
"""
|
|
cutoff = datetime.now(timezone.utc) - timedelta(seconds=self._stuck_timeout)
|
|
try:
|
|
async with get_federation_session() as session:
|
|
filters = [
|
|
APActivity.process_state == "processing",
|
|
APActivity.created_at < cutoff,
|
|
]
|
|
if self._app_name:
|
|
filters.append(APActivity.origin_app == self._app_name)
|
|
result = await session.execute(
|
|
update(APActivity)
|
|
.where(*filters)
|
|
.values(process_state="pending")
|
|
.returning(APActivity.id)
|
|
)
|
|
recovered = result.scalars().all()
|
|
await session.commit()
|
|
if recovered:
|
|
log.warning(
|
|
"Recovered %d stuck activities: %s",
|
|
len(recovered), recovered,
|
|
)
|
|
except Exception:
|
|
log.exception("Failed to recover stuck activities")
|
|
|
|
async def _process_batch(self) -> int:
|
|
"""Fetch and process a batch of pending activities. Returns count processed."""
|
|
processed = 0
|
|
async with get_federation_session() as session:
|
|
filters = [
|
|
APActivity.process_state == "pending",
|
|
APActivity.process_attempts < APActivity.process_max_attempts,
|
|
]
|
|
if self._app_name:
|
|
filters.append(APActivity.origin_app == self._app_name)
|
|
stmt = (
|
|
select(APActivity)
|
|
.where(*filters)
|
|
.order_by(APActivity.created_at)
|
|
.limit(self._batch_size)
|
|
.with_for_update(skip_locked=True)
|
|
)
|
|
result = await session.execute(stmt)
|
|
activities = result.scalars().all()
|
|
|
|
for activity in activities:
|
|
await self._process_one(session, activity)
|
|
processed += 1
|
|
|
|
await session.commit()
|
|
return processed
|
|
|
|
async def _process_one(self, session: AsyncSession, activity: APActivity) -> None:
|
|
"""Run all handlers for a single activity."""
|
|
handlers = get_activity_handlers(activity.activity_type, activity.object_type)
|
|
now = datetime.now(timezone.utc)
|
|
|
|
log.info(
|
|
"Processing activity %s: type=%s object_type=%s visibility=%s actor_profile_id=%s — %d handler(s) found",
|
|
activity.id, activity.activity_type, activity.object_type,
|
|
activity.visibility, activity.actor_profile_id, len(handlers),
|
|
)
|
|
for h in handlers:
|
|
log.info(" handler: %s.%s", h.__module__, h.__qualname__)
|
|
|
|
activity.process_state = "processing"
|
|
activity.process_attempts += 1
|
|
await session.flush()
|
|
|
|
if not handlers:
|
|
activity.process_state = "completed"
|
|
activity.processed_at = now
|
|
return
|
|
|
|
try:
|
|
for handler in handlers:
|
|
log.info(" calling %s.%s …", handler.__module__, handler.__qualname__)
|
|
await handler(activity, session)
|
|
log.info(" done %s.%s", handler.__module__, handler.__qualname__)
|
|
activity.process_state = "completed"
|
|
activity.processed_at = now
|
|
except Exception as exc:
|
|
log.exception("Handler failed for activity %s", activity.id)
|
|
activity.process_error = f"{exc.__class__.__name__}: {exc}"
|
|
if activity.process_attempts >= activity.process_max_attempts:
|
|
activity.process_state = "failed"
|
|
activity.processed_at = now
|
|
else:
|
|
activity.process_state = "pending" # retry
|