celery/database.py

# art-celery/database.py
"""
PostgreSQL database module for Art DAG L1 server.

Provides connection pooling and CRUD operations for cache metadata.
"""

import os
from datetime import datetime, timezone
from typing import List, Optional

import asyncpg

DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://artdag:artdag@localhost:5432/artdag")

pool: Optional[asyncpg.Pool] = None

SCHEMA_SQL = """
-- Core cache: just content hash and IPFS CID
-- Physical file storage - shared by all users
CREATE TABLE IF NOT EXISTS cache_items (
    cid VARCHAR(64) PRIMARY KEY,
    ipfs_cid VARCHAR(128),
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

-- Item types: per-user metadata (same item can be recipe AND media, per user)
-- actor_id format: @username@server (ActivityPub style)
CREATE TABLE IF NOT EXISTS item_types (
    id SERIAL PRIMARY KEY,
    cid VARCHAR(64) REFERENCES cache_items(cid) ON DELETE CASCADE,
    actor_id VARCHAR(255) NOT NULL,
    type VARCHAR(50) NOT NULL,
    path VARCHAR(255),
    description TEXT,
    source_type VARCHAR(20),
    source_url TEXT,
    source_note TEXT,
    pinned BOOLEAN DEFAULT FALSE,
    filename VARCHAR(255),
    metadata JSONB DEFAULT '{}',
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    UNIQUE(cid, actor_id, type, path)
);

-- Add columns if they don't exist (for existing databases)
DO $$ BEGIN
    ALTER TABLE item_types ADD COLUMN IF NOT EXISTS filename VARCHAR(255);
    ALTER TABLE item_types ADD COLUMN IF NOT EXISTS metadata JSONB DEFAULT '{}';
EXCEPTION WHEN others THEN NULL;
END $$;

-- Pin reasons: one-to-many from item_types
CREATE TABLE IF NOT EXISTS pin_reasons (
    id SERIAL PRIMARY KEY,
    item_type_id INTEGER REFERENCES item_types(id) ON DELETE CASCADE,
    reason VARCHAR(100) NOT NULL,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

-- L2 shares: per-user shares (includes content_type for role when shared)
CREATE TABLE IF NOT EXISTS l2_shares (
    id SERIAL PRIMARY KEY,
    cid VARCHAR(64) REFERENCES cache_items(cid) ON DELETE CASCADE,
    actor_id VARCHAR(255) NOT NULL,
    l2_server VARCHAR(255) NOT NULL,
    asset_name VARCHAR(255) NOT NULL,
    activity_id VARCHAR(128),
    content_type VARCHAR(50) NOT NULL,
    published_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    last_synced_at TIMESTAMP WITH TIME ZONE,
    UNIQUE(cid, actor_id, l2_server, content_type)
);

-- Add activity_id column if it doesn't exist (for existing databases)
DO $$ BEGIN
    ALTER TABLE l2_shares ADD COLUMN IF NOT EXISTS activity_id VARCHAR(128);
EXCEPTION WHEN others THEN NULL;
END $$;

-- Run cache: maps content-addressable run_id to output
-- run_id is a hash of (sorted inputs + recipe), making runs deterministic
CREATE TABLE IF NOT EXISTS run_cache (
    run_id VARCHAR(64) PRIMARY KEY,
    output_cid VARCHAR(64) NOT NULL,
    ipfs_cid VARCHAR(128),
    provenance_cid VARCHAR(128),
    recipe VARCHAR(255) NOT NULL,
    inputs JSONB NOT NULL,
    actor_id VARCHAR(255),
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

-- Pending/running runs: tracks in-progress work for durability
-- Allows runs to survive restarts and be recovered
CREATE TABLE IF NOT EXISTS pending_runs (
    run_id VARCHAR(64) PRIMARY KEY,
    celery_task_id VARCHAR(128),
    status VARCHAR(20) NOT NULL DEFAULT 'pending',  -- pending, running, failed
    recipe VARCHAR(255) NOT NULL,
    inputs JSONB NOT NULL,
    dag_json TEXT,
    output_name VARCHAR(255),
    actor_id VARCHAR(255),
    error TEXT,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

CREATE INDEX IF NOT EXISTS idx_pending_runs_status ON pending_runs(status);
CREATE INDEX IF NOT EXISTS idx_pending_runs_actor ON pending_runs(actor_id);

-- User storage backends (synced from L2 or configured locally)
CREATE TABLE IF NOT EXISTS storage_backends (
    id SERIAL PRIMARY KEY,
    actor_id VARCHAR(255) NOT NULL,
    provider_type VARCHAR(50) NOT NULL,  -- 'pinata', 'web3storage', 'nftstorage', 'infura', 'filebase', 'storj', 'local'
    provider_name VARCHAR(255),
    description TEXT,
    config JSONB NOT NULL DEFAULT '{}',
    capacity_gb INTEGER NOT NULL,
    used_bytes BIGINT DEFAULT 0,
    is_active BOOLEAN DEFAULT true,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    synced_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

-- Storage pins tracking (what's pinned where)
CREATE TABLE IF NOT EXISTS storage_pins (
    id SERIAL PRIMARY KEY,
    cid VARCHAR(64) NOT NULL,
    storage_id INTEGER NOT NULL REFERENCES storage_backends(id) ON DELETE CASCADE,
    ipfs_cid VARCHAR(128),
    pin_type VARCHAR(20) NOT NULL,  -- 'user_content', 'donated', 'system'
    size_bytes BIGINT,
    pinned_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    UNIQUE(cid, storage_id)
);

-- Indexes
CREATE INDEX IF NOT EXISTS idx_item_types_cid ON item_types(cid);
CREATE INDEX IF NOT EXISTS idx_item_types_actor_id ON item_types(actor_id);
CREATE INDEX IF NOT EXISTS idx_item_types_type ON item_types(type);
CREATE INDEX IF NOT EXISTS idx_item_types_path ON item_types(path);
CREATE INDEX IF NOT EXISTS idx_pin_reasons_item_type ON pin_reasons(item_type_id);
CREATE INDEX IF NOT EXISTS idx_l2_shares_cid ON l2_shares(cid);
CREATE INDEX IF NOT EXISTS idx_l2_shares_actor_id ON l2_shares(actor_id);
CREATE INDEX IF NOT EXISTS idx_run_cache_output ON run_cache(output_cid);
CREATE INDEX IF NOT EXISTS idx_storage_backends_actor ON storage_backends(actor_id);
CREATE INDEX IF NOT EXISTS idx_storage_backends_type ON storage_backends(provider_type);
CREATE INDEX IF NOT EXISTS idx_storage_pins_hash ON storage_pins(cid);
CREATE INDEX IF NOT EXISTS idx_storage_pins_storage ON storage_pins(storage_id);
"""


async def init_db():
    """Initialize database connection pool and create schema."""
    global pool
    pool = await asyncpg.create_pool(DATABASE_URL)
    async with pool.acquire() as conn:
        await conn.execute(SCHEMA_SQL)


async def close_db():
    """Close database connection pool."""
    global pool
    if pool:
        await pool.close()
        pool = None


# ============ Cache Items ============

async def create_cache_item(cid: str, ipfs_cid: Optional[str] = None) -> dict:
    """Create a cache item. Returns the created item."""
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """
            INSERT INTO cache_items (cid, ipfs_cid)
            VALUES ($1, $2)
            ON CONFLICT (cid) DO UPDATE SET ipfs_cid = COALESCE($2, cache_items.ipfs_cid)
            RETURNING cid, ipfs_cid, created_at
            """,
            cid, ipfs_cid
        )
        return dict(row)


async def get_cache_item(cid: str) -> Optional[dict]:
    """Get a cache item by content hash."""
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            "SELECT cid, ipfs_cid, created_at FROM cache_items WHERE cid = $1",
            cid
        )
        return dict(row) if row else None


async def update_cache_item_ipfs_cid(cid: str, ipfs_cid: str) -> bool:
    """Update the IPFS CID for a cache item."""
    async with pool.acquire() as conn:
        result = await conn.execute(
            "UPDATE cache_items SET ipfs_cid = $2 WHERE cid = $1",
            cid, ipfs_cid
        )
        return result == "UPDATE 1"


async def delete_cache_item(cid: str) -> bool:
    """Delete a cache item and all associated data (cascades)."""
    async with pool.acquire() as conn:
        result = await conn.execute(
            "DELETE FROM cache_items WHERE cid = $1",
            cid
        )
        return result == "DELETE 1"


async def list_cache_items(limit: int = 100, offset: int = 0) -> List[dict]:
    """List cache items with pagination."""
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            """
            SELECT cid, ipfs_cid, created_at
            FROM cache_items
            ORDER BY created_at DESC
            LIMIT $1 OFFSET $2
            """,
            limit, offset
        )
        return [dict(row) for row in rows]


# ============ Item Types ============

async def add_item_type(
    cid: str,
    actor_id: str,
    item_type: str,
    path: Optional[str] = None,
    description: Optional[str] = None,
    source_type: Optional[str] = None,
    source_url: Optional[str] = None,
    source_note: Optional[str] = None,
) -> dict:
    """Add a type to a cache item for a user. Creates cache_item if needed."""
    async with pool.acquire() as conn:
        # Ensure cache_item exists
        await conn.execute(
            "INSERT INTO cache_items (cid) VALUES ($1) ON CONFLICT DO NOTHING",
            cid
        )
        # Insert or update item_type
        row = await conn.fetchrow(
            """
            INSERT INTO item_types (cid, actor_id, type, path, description, source_type, source_url, source_note)
            VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
            ON CONFLICT (cid, actor_id, type, path) DO UPDATE SET
                description = COALESCE($5, item_types.description),
                source_type = COALESCE($6, item_types.source_type),
                source_url = COALESCE($7, item_types.source_url),
                source_note = COALESCE($8, item_types.source_note)
            RETURNING id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
            """,
            cid, actor_id, item_type, path, description, source_type, source_url, source_note
        )
        return dict(row)


async def get_item_types(cid: str, actor_id: Optional[str] = None) -> List[dict]:
    """Get types for a cache item, optionally filtered by user."""
    async with pool.acquire() as conn:
        if actor_id:
            rows = await conn.fetch(
                """
                SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
                FROM item_types
                WHERE cid = $1 AND actor_id = $2
                ORDER BY created_at
                """,
                cid, actor_id
            )
        else:
            rows = await conn.fetch(
                """
                SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
                FROM item_types
                WHERE cid = $1
                ORDER BY created_at
                """,
                cid
            )
        return [dict(row) for row in rows]


async def get_item_type(cid: str, actor_id: str, item_type: str, path: Optional[str] = None) -> Optional[dict]:
    """Get a specific type for a cache item and user."""
    async with pool.acquire() as conn:
        if path is None:
            row = await conn.fetchrow(
                """
                SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
                FROM item_types
                WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
                """,
                cid, actor_id, item_type
            )
        else:
            row = await conn.fetchrow(
                """
                SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
                FROM item_types
                WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path = $4
                """,
                cid, actor_id, item_type, path
            )
        return dict(row) if row else None


async def update_item_type(
    item_type_id: int,
    description: Optional[str] = None,
    source_type: Optional[str] = None,
    source_url: Optional[str] = None,
    source_note: Optional[str] = None,
) -> bool:
    """Update an item type's metadata."""
    async with pool.acquire() as conn:
        result = await conn.execute(
            """
            UPDATE item_types SET
                description = COALESCE($2, description),
                source_type = COALESCE($3, source_type),
                source_url = COALESCE($4, source_url),
                source_note = COALESCE($5, source_note)
            WHERE id = $1
            """,
            item_type_id, description, source_type, source_url, source_note
        )
        return result == "UPDATE 1"


async def delete_item_type(cid: str, actor_id: str, item_type: str, path: Optional[str] = None) -> bool:
    """Delete a specific type from a cache item for a user."""
    async with pool.acquire() as conn:
        if path is None:
            result = await conn.execute(
                "DELETE FROM item_types WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL",
                cid, actor_id, item_type
            )
        else:
            result = await conn.execute(
                "DELETE FROM item_types WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path = $4",
                cid, actor_id, item_type, path
            )
        return result == "DELETE 1"


async def list_items_by_type(item_type: str, actor_id: Optional[str] = None, limit: int = 100, offset: int = 0) -> List[dict]:
    """List items of a specific type, optionally filtered by user."""
    async with pool.acquire() as conn:
        if actor_id:
            rows = await conn.fetch(
                """
                SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
                       it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
                       ci.ipfs_cid
                FROM item_types it
                JOIN cache_items ci ON it.cid = ci.cid
                WHERE it.type = $1 AND it.actor_id = $2
                ORDER BY it.created_at DESC
                LIMIT $3 OFFSET $4
                """,
                item_type, actor_id, limit, offset
            )
        else:
            rows = await conn.fetch(
                """
                SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
                       it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
                       ci.ipfs_cid
                FROM item_types it
                JOIN cache_items ci ON it.cid = ci.cid
                WHERE it.type = $1
                ORDER BY it.created_at DESC
                LIMIT $2 OFFSET $3
                """,
                item_type, limit, offset
            )
        return [dict(row) for row in rows]


async def get_item_by_path(item_type: str, path: str, actor_id: Optional[str] = None) -> Optional[dict]:
    """Get an item by its type and path (e.g., recipe:/effects/dog), optionally for a specific user."""
    async with pool.acquire() as conn:
        if actor_id:
            row = await conn.fetchrow(
                """
                SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
                       it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
                       ci.ipfs_cid
                FROM item_types it
                JOIN cache_items ci ON it.cid = ci.cid
                WHERE it.type = $1 AND it.path = $2 AND it.actor_id = $3
                """,
                item_type, path, actor_id
            )
        else:
            row = await conn.fetchrow(
                """
                SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
                       it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
                       ci.ipfs_cid
                FROM item_types it
                JOIN cache_items ci ON it.cid = ci.cid
                WHERE it.type = $1 AND it.path = $2
                """,
                item_type, path
            )
        return dict(row) if row else None


# ============ Pinning ============

async def pin_item_type(item_type_id: int, reason: str) -> bool:
    """Pin an item type with a reason."""
    async with pool.acquire() as conn:
        async with conn.transaction():
            # Set pinned flag
            await conn.execute(
                "UPDATE item_types SET pinned = TRUE WHERE id = $1",
                item_type_id
            )
            # Add pin reason
            await conn.execute(
                "INSERT INTO pin_reasons (item_type_id, reason) VALUES ($1, $2)",
                item_type_id, reason
            )
        return True


async def unpin_item_type(item_type_id: int, reason: Optional[str] = None) -> bool:
    """Remove a pin reason from an item type. If no reasons left, unpins the item."""
    async with pool.acquire() as conn:
        async with conn.transaction():
            if reason:
                # Remove specific reason
                await conn.execute(
                    "DELETE FROM pin_reasons WHERE item_type_id = $1 AND reason = $2",
                    item_type_id, reason
                )
            else:
                # Remove all reasons
                await conn.execute(
                    "DELETE FROM pin_reasons WHERE item_type_id = $1",
                    item_type_id
                )

            # Check if any reasons remain
            count = await conn.fetchval(
                "SELECT COUNT(*) FROM pin_reasons WHERE item_type_id = $1",
                item_type_id
            )

            if count == 0:
                await conn.execute(
                    "UPDATE item_types SET pinned = FALSE WHERE id = $1",
                    item_type_id
                )
        return True


async def get_pin_reasons(item_type_id: int) -> List[dict]:
    """Get all pin reasons for an item type."""
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            "SELECT id, reason, created_at FROM pin_reasons WHERE item_type_id = $1 ORDER BY created_at",
            item_type_id
        )
        return [dict(row) for row in rows]


async def is_item_pinned(cid: str, item_type: Optional[str] = None) -> tuple[bool, List[str]]:
    """Check if any type of a cache item is pinned. Returns (is_pinned, reasons)."""
    async with pool.acquire() as conn:
        if item_type:
            rows = await conn.fetch(
                """
                SELECT pr.reason
                FROM pin_reasons pr
                JOIN item_types it ON pr.item_type_id = it.id
                WHERE it.cid = $1 AND it.type = $2 AND it.pinned = TRUE
                """,
                cid, item_type
            )
        else:
            rows = await conn.fetch(
                """
                SELECT pr.reason
                FROM pin_reasons pr
                JOIN item_types it ON pr.item_type_id = it.id
                WHERE it.cid = $1 AND it.pinned = TRUE
                """,
                cid
            )
        reasons = [row["reason"] for row in rows]
        return len(reasons) > 0, reasons


# ============ L2 Shares ============

async def add_l2_share(
    cid: str,
    actor_id: str,
    l2_server: str,
    asset_name: str,
    content_type: str,
) -> dict:
    """Add or update an L2 share for a user."""
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """
            INSERT INTO l2_shares (cid, actor_id, l2_server, asset_name, content_type, last_synced_at)
            VALUES ($1, $2, $3, $4, $5, NOW())
            ON CONFLICT (cid, actor_id, l2_server, content_type) DO UPDATE SET
                asset_name = $4,
                last_synced_at = NOW()
            RETURNING id, cid, actor_id, l2_server, asset_name, content_type, published_at, last_synced_at
            """,
            cid, actor_id, l2_server, asset_name, content_type
        )
        return dict(row)


async def get_l2_shares(cid: str, actor_id: Optional[str] = None) -> List[dict]:
    """Get L2 shares for a cache item, optionally filtered by user."""
    async with pool.acquire() as conn:
        if actor_id:
            rows = await conn.fetch(
                """
                SELECT id, cid, actor_id, l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
                FROM l2_shares
                WHERE cid = $1 AND actor_id = $2
                ORDER BY published_at
                """,
                cid, actor_id
            )
        else:
            rows = await conn.fetch(
                """
                SELECT id, cid, actor_id, l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
                FROM l2_shares
                WHERE cid = $1
                ORDER BY published_at
                """,
                cid
            )
        return [dict(row) for row in rows]


async def delete_l2_share(cid: str, actor_id: str, l2_server: str, content_type: str) -> bool:
    """Delete an L2 share for a user."""
    async with pool.acquire() as conn:
        result = await conn.execute(
            "DELETE FROM l2_shares WHERE cid = $1 AND actor_id = $2 AND l2_server = $3 AND content_type = $4",
            cid, actor_id, l2_server, content_type
        )
        return result == "DELETE 1"


# ============ Cache Item Cleanup ============

async def has_remaining_references(cid: str) -> bool:
    """Check if a cache item has any remaining item_types or l2_shares."""
    async with pool.acquire() as conn:
        item_types_count = await conn.fetchval(
            "SELECT COUNT(*) FROM item_types WHERE cid = $1",
            cid
        )
        if item_types_count > 0:
            return True

        l2_shares_count = await conn.fetchval(
            "SELECT COUNT(*) FROM l2_shares WHERE cid = $1",
            cid
        )
        return l2_shares_count > 0


async def cleanup_orphaned_cache_item(cid: str) -> bool:
    """Delete a cache item if it has no remaining references. Returns True if deleted."""
    async with pool.acquire() as conn:
        # Only delete if no item_types or l2_shares reference it
        result = await conn.execute(
            """
            DELETE FROM cache_items
            WHERE cid = $1
            AND NOT EXISTS (SELECT 1 FROM item_types WHERE cid = $1)
            AND NOT EXISTS (SELECT 1 FROM l2_shares WHERE cid = $1)
            """,
            cid
        )
        return result == "DELETE 1"


# ============ High-Level Metadata Functions ============
# These provide a compatible interface to the old JSON-based save_cache_meta/load_cache_meta

import json as _json


async def save_item_metadata(
    cid: str,
    actor_id: str,
    item_type: str = "media",
    filename: Optional[str] = None,
    description: Optional[str] = None,
    source_type: Optional[str] = None,
    source_url: Optional[str] = None,
    source_note: Optional[str] = None,
    pinned: bool = False,
    pin_reason: Optional[str] = None,
    tags: Optional[List[str]] = None,
    folder: Optional[str] = None,
    collections: Optional[List[str]] = None,
    **extra_metadata
) -> dict:
    """
    Save or update item metadata in the database.

    Returns a dict with the item metadata (compatible with old JSON format).
    """
    # Build metadata JSONB for extra fields
    metadata = {}
    if tags:
        metadata["tags"] = tags
    if folder:
        metadata["folder"] = folder
    if collections:
        metadata["collections"] = collections
    metadata.update(extra_metadata)

    async with pool.acquire() as conn:
        # Ensure cache_item exists
        await conn.execute(
            "INSERT INTO cache_items (cid) VALUES ($1) ON CONFLICT DO NOTHING",
            cid
        )

        # Upsert item_type
        row = await conn.fetchrow(
            """
            INSERT INTO item_types (cid, actor_id, type, description, source_type, source_url, source_note, pinned, filename, metadata)
            VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
            ON CONFLICT (cid, actor_id, type, path) DO UPDATE SET
                description = COALESCE(EXCLUDED.description, item_types.description),
                source_type = COALESCE(EXCLUDED.source_type, item_types.source_type),
                source_url = COALESCE(EXCLUDED.source_url, item_types.source_url),
                source_note = COALESCE(EXCLUDED.source_note, item_types.source_note),
                pinned = EXCLUDED.pinned,
                filename = COALESCE(EXCLUDED.filename, item_types.filename),
                metadata = item_types.metadata || EXCLUDED.metadata
            RETURNING id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at
            """,
            cid, actor_id, item_type, description, source_type, source_url, source_note, pinned, filename, _json.dumps(metadata)
        )

        item_type_id = row["id"]

        # Handle pinning
        if pinned and pin_reason:
            # Add pin reason if not exists
            await conn.execute(
                """
                INSERT INTO pin_reasons (item_type_id, reason)
                VALUES ($1, $2)
                ON CONFLICT DO NOTHING
                """,
                item_type_id, pin_reason
            )

        # Build response dict (compatible with old format)
        result = {
            "uploader": actor_id,
            "uploaded_at": row["created_at"].isoformat() if row["created_at"] else None,
            "filename": row["filename"],
            "type": row["type"],
            "description": row["description"],
            "pinned": row["pinned"],
        }

        # Add origin if present
        if row["source_type"] or row["source_url"] or row["source_note"]:
            result["origin"] = {
                "type": row["source_type"],
                "url": row["source_url"],
                "note": row["source_note"]
            }

        # Add metadata fields
        if row["metadata"]:
            meta = row["metadata"] if isinstance(row["metadata"], dict) else _json.loads(row["metadata"])
            if meta.get("tags"):
                result["tags"] = meta["tags"]
            if meta.get("folder"):
                result["folder"] = meta["folder"]
            if meta.get("collections"):
                result["collections"] = meta["collections"]

        # Get pin reasons
        if row["pinned"]:
            reasons = await conn.fetch(
                "SELECT reason FROM pin_reasons WHERE item_type_id = $1",
                item_type_id
            )
            if reasons:
                result["pin_reason"] = reasons[0]["reason"]

        return result


async def load_item_metadata(cid: str, actor_id: Optional[str] = None) -> dict:
    """
    Load item metadata from the database.

    If actor_id is provided, returns metadata for that user's view of the item.
    Otherwise, returns combined metadata from all users (for backwards compat).

    Returns a dict compatible with old JSON format.
    """
    async with pool.acquire() as conn:
        # Get cache item
        cache_item = await conn.fetchrow(
            "SELECT cid, ipfs_cid, created_at FROM cache_items WHERE cid = $1",
            cid
        )

        if not cache_item:
            return {}

        # Get item types
        if actor_id:
            item_types = await conn.fetch(
                """
                SELECT id, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at
                FROM item_types WHERE cid = $1 AND actor_id = $2
                ORDER BY created_at
                """,
                cid, actor_id
            )
        else:
            item_types = await conn.fetch(
                """
                SELECT id, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at
                FROM item_types WHERE cid = $1
                ORDER BY created_at
                """,
                cid
            )

        if not item_types:
            return {"uploaded_at": cache_item["created_at"].isoformat() if cache_item["created_at"] else None}

        # Use first item type as primary (for backwards compat)
        primary = item_types[0]

        result = {
            "uploader": primary["actor_id"],
            "uploaded_at": primary["created_at"].isoformat() if primary["created_at"] else None,
            "filename": primary["filename"],
            "type": primary["type"],
            "description": primary["description"],
            "pinned": any(it["pinned"] for it in item_types),
        }

        # Add origin if present
        if primary["source_type"] or primary["source_url"] or primary["source_note"]:
            result["origin"] = {
                "type": primary["source_type"],
                "url": primary["source_url"],
                "note": primary["source_note"]
            }

        # Add metadata fields
        if primary["metadata"]:
            meta = primary["metadata"] if isinstance(primary["metadata"], dict) else _json.loads(primary["metadata"])
            if meta.get("tags"):
                result["tags"] = meta["tags"]
            if meta.get("folder"):
                result["folder"] = meta["folder"]
            if meta.get("collections"):
                result["collections"] = meta["collections"]

        # Get pin reasons for pinned items
        for it in item_types:
            if it["pinned"]:
                reasons = await conn.fetch(
                    "SELECT reason FROM pin_reasons WHERE item_type_id = $1",
                    it["id"]
                )
                if reasons:
                    result["pin_reason"] = reasons[0]["reason"]
                    break

        # Get L2 shares
        if actor_id:
            shares = await conn.fetch(
                """
                SELECT l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
                FROM l2_shares WHERE cid = $1 AND actor_id = $2
                """,
                cid, actor_id
            )
        else:
            shares = await conn.fetch(
                """
                SELECT l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
                FROM l2_shares WHERE cid = $1
                """,
                cid
            )

        if shares:
            result["l2_shares"] = [
                {
                    "l2_server": s["l2_server"],
                    "asset_name": s["asset_name"],
                    "activity_id": s["activity_id"],
                    "content_type": s["content_type"],
                    "published_at": s["published_at"].isoformat() if s["published_at"] else None,
                    "last_synced_at": s["last_synced_at"].isoformat() if s["last_synced_at"] else None,
                }
                for s in shares
            ]

            # For backwards compat, also set "published" if shared
            result["published"] = {
                "to_l2": True,
                "asset_name": shares[0]["asset_name"],
                "activity_id": shares[0]["activity_id"],
                "l2_server": shares[0]["l2_server"],
            }

        return result


async def update_item_metadata(
    cid: str,
    actor_id: str,
    item_type: str = "media",
    **updates
) -> dict:
    """
    Update specific fields of item metadata.

    Returns updated metadata dict.
    """
    # Extract known fields from updates
    description = updates.pop("description", None)
    source_type = updates.pop("source_type", None)
    source_url = updates.pop("source_url", None)
    source_note = updates.pop("source_note", None)

    # Handle origin dict format
    origin = updates.pop("origin", None)
    if origin:
        source_type = origin.get("type", source_type)
        source_url = origin.get("url", source_url)
        source_note = origin.get("note", source_note)

    pinned = updates.pop("pinned", None)
    pin_reason = updates.pop("pin_reason", None)
    filename = updates.pop("filename", None)
    tags = updates.pop("tags", None)
    folder = updates.pop("folder", None)
    collections = updates.pop("collections", None)

    async with pool.acquire() as conn:
        # Get existing item_type
        existing = await conn.fetchrow(
            """
            SELECT id, metadata FROM item_types
            WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
            """,
            cid, actor_id, item_type
        )

        if not existing:
            # Create new entry
            return await save_item_metadata(
                cid, actor_id, item_type,
                filename=filename, description=description,
                source_type=source_type, source_url=source_url, source_note=source_note,
                pinned=pinned or False, pin_reason=pin_reason,
                tags=tags, folder=folder, collections=collections,
                **updates
            )

        # Build update query dynamically
        set_parts = []
        params = [cid, actor_id, item_type]
        param_idx = 4

        if description is not None:
            set_parts.append(f"description = ${param_idx}")
            params.append(description)
            param_idx += 1

        if source_type is not None:
            set_parts.append(f"source_type = ${param_idx}")
            params.append(source_type)
            param_idx += 1

        if source_url is not None:
            set_parts.append(f"source_url = ${param_idx}")
            params.append(source_url)
            param_idx += 1

        if source_note is not None:
            set_parts.append(f"source_note = ${param_idx}")
            params.append(source_note)
            param_idx += 1

        if pinned is not None:
            set_parts.append(f"pinned = ${param_idx}")
            params.append(pinned)
            param_idx += 1

        if filename is not None:
            set_parts.append(f"filename = ${param_idx}")
            params.append(filename)
            param_idx += 1

        # Handle metadata updates
        current_metadata = existing["metadata"] if isinstance(existing["metadata"], dict) else (_json.loads(existing["metadata"]) if existing["metadata"] else {})
        if tags is not None:
            current_metadata["tags"] = tags
        if folder is not None:
            current_metadata["folder"] = folder
        if collections is not None:
            current_metadata["collections"] = collections
        current_metadata.update(updates)

        if current_metadata:
            set_parts.append(f"metadata = ${param_idx}")
            params.append(_json.dumps(current_metadata))
            param_idx += 1

        if set_parts:
            query = f"""
                UPDATE item_types SET {', '.join(set_parts)}
                WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
            """
            await conn.execute(query, *params)

        # Handle pin reason
        if pinned and pin_reason:
            await conn.execute(
                """
                INSERT INTO pin_reasons (item_type_id, reason)
                VALUES ($1, $2)
                ON CONFLICT DO NOTHING
                """,
                existing["id"], pin_reason
            )

        return await load_item_metadata(cid, actor_id)


async def save_l2_share(
    cid: str,
    actor_id: str,
    l2_server: str,
    asset_name: str,
    content_type: str = "media",
    activity_id: Optional[str] = None
) -> dict:
    """Save an L2 share and return share info."""
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """
            INSERT INTO l2_shares (cid, actor_id, l2_server, asset_name, activity_id, content_type, last_synced_at)
            VALUES ($1, $2, $3, $4, $5, $6, NOW())
            ON CONFLICT (cid, actor_id, l2_server, content_type) DO UPDATE SET
                asset_name = EXCLUDED.asset_name,
                activity_id = COALESCE(EXCLUDED.activity_id, l2_shares.activity_id),
                last_synced_at = NOW()
            RETURNING l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
            """,
            cid, actor_id, l2_server, asset_name, activity_id, content_type
        )
        return {
            "l2_server": row["l2_server"],
            "asset_name": row["asset_name"],
            "activity_id": row["activity_id"],
            "content_type": row["content_type"],
            "published_at": row["published_at"].isoformat() if row["published_at"] else None,
            "last_synced_at": row["last_synced_at"].isoformat() if row["last_synced_at"] else None,
        }


async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit: int = 100, offset: int = 0) -> List[dict]:
    """Get all items for a user, optionally filtered by type. Deduplicates by cid."""
    async with pool.acquire() as conn:
        if item_type:
            rows = await conn.fetch(
                """
                SELECT * FROM (
                    SELECT DISTINCT ON (it.cid)
                           it.cid, it.type, it.description, it.filename, it.pinned, it.created_at,
                           ci.ipfs_cid
                    FROM item_types it
                    JOIN cache_items ci ON it.cid = ci.cid
                    WHERE it.actor_id = $1 AND it.type = $2
                    ORDER BY it.cid, it.created_at DESC
                ) deduped
                ORDER BY created_at DESC
                LIMIT $3 OFFSET $4
                """,
                actor_id, item_type, limit, offset
            )
        else:
            rows = await conn.fetch(
                """
                SELECT * FROM (
                    SELECT DISTINCT ON (it.cid)
                           it.cid, it.type, it.description, it.filename, it.pinned, it.created_at,
                           ci.ipfs_cid
                    FROM item_types it
                    JOIN cache_items ci ON it.cid = ci.cid
                    WHERE it.actor_id = $1
                    ORDER BY it.cid, it.created_at DESC
                ) deduped
                ORDER BY created_at DESC
                LIMIT $2 OFFSET $3
                """,
                actor_id, limit, offset
            )

        return [
            {
                "cid": r["cid"],
                "type": r["type"],
                "description": r["description"],
                "filename": r["filename"],
                "pinned": r["pinned"],
                "created_at": r["created_at"].isoformat() if r["created_at"] else None,
                "ipfs_cid": r["ipfs_cid"],
            }
            for r in rows
        ]


async def count_user_items(actor_id: str, item_type: Optional[str] = None) -> int:
    """Count unique items (by cid) for a user."""
    async with pool.acquire() as conn:
        if item_type:
            return await conn.fetchval(
                "SELECT COUNT(DISTINCT cid) FROM item_types WHERE actor_id = $1 AND type = $2",
                actor_id, item_type
            )
        else:
            return await conn.fetchval(
                "SELECT COUNT(DISTINCT cid) FROM item_types WHERE actor_id = $1",
                actor_id
            )


# ============ Run Cache ============

async def get_run_cache(run_id: str) -> Optional[dict]:
    """Get cached run result by content-addressable run_id."""
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """
            SELECT run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
            FROM run_cache WHERE run_id = $1
            """,
            run_id
        )
        if row:
            return {
                "run_id": row["run_id"],
                "output_cid": row["output_cid"],
                "ipfs_cid": row["ipfs_cid"],
                "provenance_cid": row["provenance_cid"],
                "recipe": row["recipe"],
                "inputs": row["inputs"],
                "actor_id": row["actor_id"],
                "created_at": row["created_at"].isoformat() if row["created_at"] else None,
            }
        return None


async def save_run_cache(
    run_id: str,
    output_cid: str,
    recipe: str,
    inputs: List[str],
    ipfs_cid: Optional[str] = None,
    provenance_cid: Optional[str] = None,
    actor_id: Optional[str] = None,
) -> dict:
    """Save run result to cache. Updates if run_id already exists."""
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """
            INSERT INTO run_cache (run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id)
            VALUES ($1, $2, $3, $4, $5, $6, $7)
            ON CONFLICT (run_id) DO UPDATE SET
                output_cid = EXCLUDED.output_cid,
                ipfs_cid = COALESCE(EXCLUDED.ipfs_cid, run_cache.ipfs_cid),
                provenance_cid = COALESCE(EXCLUDED.provenance_cid, run_cache.provenance_cid)
            RETURNING run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
            """,
            run_id, output_cid, ipfs_cid, provenance_cid, recipe, _json.dumps(inputs), actor_id
        )
        return {
            "run_id": row["run_id"],
            "output_cid": row["output_cid"],
            "ipfs_cid": row["ipfs_cid"],
            "provenance_cid": row["provenance_cid"],
            "recipe": row["recipe"],
            "inputs": row["inputs"],
            "actor_id": row["actor_id"],
            "created_at": row["created_at"].isoformat() if row["created_at"] else None,
        }


async def get_run_by_output(output_cid: str) -> Optional[dict]:
    """Get run cache entry by output hash."""
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """
            SELECT run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
            FROM run_cache WHERE output_cid = $1
            """,
            output_cid
        )
        if row:
            return {
                "run_id": row["run_id"],
                "output_cid": row["output_cid"],
                "ipfs_cid": row["ipfs_cid"],
                "provenance_cid": row["provenance_cid"],
                "recipe": row["recipe"],
                "inputs": row["inputs"],
                "actor_id": row["actor_id"],
                "created_at": row["created_at"].isoformat() if row["created_at"] else None,
            }
        return None


def _parse_inputs(inputs_value):
    """Parse inputs from database - may be JSON string, list, or None."""
    if inputs_value is None:
        return []
    if isinstance(inputs_value, list):
        return inputs_value
    if isinstance(inputs_value, str):
        try:
            parsed = _json.loads(inputs_value)
            if isinstance(parsed, list):
                return parsed
            return []
        except (_json.JSONDecodeError, TypeError):
            return []
    return []


async def list_runs_by_actor(actor_id: str, offset: int = 0, limit: int = 20) -> List[dict]:
    """List completed runs for a user, ordered by creation time (newest first)."""
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            """
            SELECT run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
            FROM run_cache
            WHERE actor_id = $1
            ORDER BY created_at DESC
            LIMIT $2 OFFSET $3
            """,
            actor_id, limit, offset
        )
        return [
            {
                "run_id": row["run_id"],
                "output_cid": row["output_cid"],
                "ipfs_cid": row["ipfs_cid"],
                "provenance_cid": row["provenance_cid"],
                "recipe": row["recipe"],
                "inputs": _parse_inputs(row["inputs"]),
                "actor_id": row["actor_id"],
                "created_at": row["created_at"].isoformat() if row["created_at"] else None,
                "status": "completed",
            }
            for row in rows
        ]


# ============ Storage Backends ============

async def get_user_storage(actor_id: str) -> List[dict]:
    """Get all storage backends for a user."""
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            """SELECT id, actor_id, provider_type, provider_name, description, config,
                      capacity_gb, used_bytes, is_active, created_at, synced_at
               FROM storage_backends WHERE actor_id = $1
               ORDER BY provider_type, created_at""",
            actor_id
        )
        return [dict(row) for row in rows]


async def get_user_storage_by_type(actor_id: str, provider_type: str) -> List[dict]:
    """Get storage backends of a specific type for a user."""
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            """SELECT id, actor_id, provider_type, provider_name, description, config,
                      capacity_gb, used_bytes, is_active, created_at, synced_at
               FROM storage_backends WHERE actor_id = $1 AND provider_type = $2
               ORDER BY created_at""",
            actor_id, provider_type
        )
        return [dict(row) for row in rows]


async def get_storage_by_id(storage_id: int) -> Optional[dict]:
    """Get a storage backend by ID."""
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """SELECT id, actor_id, provider_type, provider_name, description, config,
                      capacity_gb, used_bytes, is_active, created_at, synced_at
               FROM storage_backends WHERE id = $1""",
            storage_id
        )
        return dict(row) if row else None


async def add_user_storage(
    actor_id: str,
    provider_type: str,
    provider_name: str,
    config: dict,
    capacity_gb: int,
    description: Optional[str] = None
) -> Optional[int]:
    """Add a storage backend for a user. Returns storage ID."""
    async with pool.acquire() as conn:
        try:
            row = await conn.fetchrow(
                """INSERT INTO storage_backends (actor_id, provider_type, provider_name, description, config, capacity_gb)
                   VALUES ($1, $2, $3, $4, $5, $6)
                   RETURNING id""",
                actor_id, provider_type, provider_name, description, _json.dumps(config), capacity_gb
            )
            return row["id"] if row else None
        except Exception:
            return None


async def update_user_storage(
    storage_id: int,
    provider_name: Optional[str] = None,
    description: Optional[str] = None,
    config: Optional[dict] = None,
    capacity_gb: Optional[int] = None,
    is_active: Optional[bool] = None
) -> bool:
    """Update a storage backend."""
    updates = []
    params = []
    param_num = 1

    if provider_name is not None:
        updates.append(f"provider_name = ${param_num}")
        params.append(provider_name)
        param_num += 1
    if description is not None:
        updates.append(f"description = ${param_num}")
        params.append(description)
        param_num += 1
    if config is not None:
        updates.append(f"config = ${param_num}")
        params.append(_json.dumps(config))
        param_num += 1
    if capacity_gb is not None:
        updates.append(f"capacity_gb = ${param_num}")
        params.append(capacity_gb)
        param_num += 1
    if is_active is not None:
        updates.append(f"is_active = ${param_num}")
        params.append(is_active)
        param_num += 1

    if not updates:
        return False

    updates.append("synced_at = NOW()")
    params.append(storage_id)

    async with pool.acquire() as conn:
        result = await conn.execute(
            f"UPDATE storage_backends SET {', '.join(updates)} WHERE id = ${param_num}",
            *params
        )
        return "UPDATE 1" in result


async def remove_user_storage(storage_id: int) -> bool:
    """Remove a storage backend. Cascades to storage_pins."""
    async with pool.acquire() as conn:
        result = await conn.execute(
            "DELETE FROM storage_backends WHERE id = $1",
            storage_id
        )
        return "DELETE 1" in result


async def get_storage_usage(storage_id: int) -> dict:
    """Get storage usage stats."""
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """SELECT
                 COUNT(*) as pin_count,
                 COALESCE(SUM(size_bytes), 0) as used_bytes
               FROM storage_pins WHERE storage_id = $1""",
            storage_id
        )
        return {"pin_count": row["pin_count"], "used_bytes": row["used_bytes"]}


async def get_all_active_storage() -> List[dict]:
    """Get all active storage backends (for distributed pinning)."""
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            """SELECT sb.id, sb.actor_id, sb.provider_type, sb.provider_name, sb.description,
                      sb.config, sb.capacity_gb, sb.is_active, sb.created_at, sb.synced_at,
                      COALESCE(SUM(sp.size_bytes), 0) as used_bytes,
                      COUNT(sp.id) as pin_count
               FROM storage_backends sb
               LEFT JOIN storage_pins sp ON sb.id = sp.storage_id
               WHERE sb.is_active = true
               GROUP BY sb.id
               ORDER BY sb.provider_type, sb.created_at"""
        )
        return [dict(row) for row in rows]


async def add_storage_pin(
    cid: str,
    storage_id: int,
    ipfs_cid: Optional[str],
    pin_type: str,
    size_bytes: int
) -> Optional[int]:
    """Add a pin record. Returns pin ID."""
    async with pool.acquire() as conn:
        try:
            row = await conn.fetchrow(
                """INSERT INTO storage_pins (cid, storage_id, ipfs_cid, pin_type, size_bytes)
                   VALUES ($1, $2, $3, $4, $5)
                   ON CONFLICT (cid, storage_id) DO UPDATE SET
                       ipfs_cid = EXCLUDED.ipfs_cid,
                       pin_type = EXCLUDED.pin_type,
                       size_bytes = EXCLUDED.size_bytes,
                       pinned_at = NOW()
                   RETURNING id""",
                cid, storage_id, ipfs_cid, pin_type, size_bytes
            )
            return row["id"] if row else None
        except Exception:
            return None


async def remove_storage_pin(cid: str, storage_id: int) -> bool:
    """Remove a pin record."""
    async with pool.acquire() as conn:
        result = await conn.execute(
            "DELETE FROM storage_pins WHERE cid = $1 AND storage_id = $2",
            cid, storage_id
        )
        return "DELETE 1" in result


async def get_pins_for_content(cid: str) -> List[dict]:
    """Get all storage locations where content is pinned."""
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            """SELECT sp.*, sb.provider_type, sb.provider_name, sb.actor_id
               FROM storage_pins sp
               JOIN storage_backends sb ON sp.storage_id = sb.id
               WHERE sp.cid = $1""",
            cid
        )
        return [dict(row) for row in rows]


# ============ Pending Runs ============

async def create_pending_run(
    run_id: str,
    celery_task_id: str,
    recipe: str,
    inputs: List[str],
    actor_id: str,
    dag_json: Optional[str] = None,
    output_name: Optional[str] = None,
) -> dict:
    """Create a pending run record for durability."""
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """
            INSERT INTO pending_runs (run_id, celery_task_id, status, recipe, inputs, dag_json, output_name, actor_id)
            VALUES ($1, $2, 'running', $3, $4, $5, $6, $7)
            ON CONFLICT (run_id) DO UPDATE SET
                celery_task_id = EXCLUDED.celery_task_id,
                status = 'running',
                updated_at = NOW()
            RETURNING run_id, celery_task_id, status, recipe, inputs, dag_json, output_name, actor_id, created_at, updated_at
            """,
            run_id, celery_task_id, recipe, _json.dumps(inputs), dag_json, output_name, actor_id
        )
        return {
            "run_id": row["run_id"],
            "celery_task_id": row["celery_task_id"],
            "status": row["status"],
            "recipe": row["recipe"],
            "inputs": row["inputs"],
            "dag_json": row["dag_json"],
            "output_name": row["output_name"],
            "actor_id": row["actor_id"],
            "created_at": row["created_at"].isoformat() if row["created_at"] else None,
            "updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
        }


async def get_pending_run(run_id: str) -> Optional[dict]:
    """Get a pending run by ID."""
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """
            SELECT run_id, celery_task_id, status, recipe, inputs, dag_json, output_name, actor_id, error, created_at, updated_at
            FROM pending_runs WHERE run_id = $1
            """,
            run_id
        )
        if row:
            # Parse inputs if it's a string (JSONB should auto-parse but be safe)
            inputs = row["inputs"]
            if isinstance(inputs, str):
                inputs = _json.loads(inputs)
            return {
                "run_id": row["run_id"],
                "celery_task_id": row["celery_task_id"],
                "status": row["status"],
                "recipe": row["recipe"],
                "inputs": inputs,
                "dag_json": row["dag_json"],
                "output_name": row["output_name"],
                "actor_id": row["actor_id"],
                "error": row["error"],
                "created_at": row["created_at"].isoformat() if row["created_at"] else None,
                "updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
            }
        return None


async def list_pending_runs(actor_id: Optional[str] = None, status: Optional[str] = None) -> List[dict]:
    """List pending runs, optionally filtered by actor and/or status."""
    async with pool.acquire() as conn:
        conditions = []
        params = []
        param_idx = 1

        if actor_id:
            conditions.append(f"actor_id = ${param_idx}")
            params.append(actor_id)
            param_idx += 1

        if status:
            conditions.append(f"status = ${param_idx}")
            params.append(status)
            param_idx += 1

        where_clause = " AND ".join(conditions) if conditions else "TRUE"

        rows = await conn.fetch(
            f"""
            SELECT run_id, celery_task_id, status, recipe, inputs, output_name, actor_id, error, created_at, updated_at
            FROM pending_runs
            WHERE {where_clause}
            ORDER BY created_at DESC
            """,
            *params
        )
        results = []
        for row in rows:
            # Parse inputs if it's a string
            inputs = row["inputs"]
            if isinstance(inputs, str):
                inputs = _json.loads(inputs)
            results.append({
                "run_id": row["run_id"],
                "celery_task_id": row["celery_task_id"],
                "status": row["status"],
                "recipe": row["recipe"],
                "inputs": inputs,
                "output_name": row["output_name"],
                "actor_id": row["actor_id"],
                "error": row["error"],
                "created_at": row["created_at"].isoformat() if row["created_at"] else None,
                "updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
            })
        return results


async def update_pending_run_status(run_id: str, status: str, error: Optional[str] = None) -> bool:
    """Update the status of a pending run."""
    async with pool.acquire() as conn:
        if error:
            result = await conn.execute(
                "UPDATE pending_runs SET status = $2, error = $3, updated_at = NOW() WHERE run_id = $1",
                run_id, status, error
            )
        else:
            result = await conn.execute(
                "UPDATE pending_runs SET status = $2, updated_at = NOW() WHERE run_id = $1",
                run_id, status
            )
        return "UPDATE 1" in result


async def complete_pending_run(run_id: str) -> bool:
    """Remove a pending run after it completes (moves to run_cache)."""
    async with pool.acquire() as conn:
        result = await conn.execute(
            "DELETE FROM pending_runs WHERE run_id = $1",
            run_id
        )
        return "DELETE 1" in result


async def get_stale_pending_runs(older_than_hours: int = 24) -> List[dict]:
    """Get pending runs that haven't been updated recently (for recovery)."""
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            """
            SELECT run_id, celery_task_id, status, recipe, inputs, dag_json, output_name, actor_id, created_at, updated_at
            FROM pending_runs
            WHERE status IN ('pending', 'running')
            AND updated_at < NOW() - INTERVAL '%s hours'
            ORDER BY created_at
            """,
            older_than_hours
        )
        return [
            {
                "run_id": row["run_id"],
                "celery_task_id": row["celery_task_id"],
                "status": row["status"],
                "recipe": row["recipe"],
                "inputs": row["inputs"],
                "dag_json": row["dag_json"],
                "output_name": row["output_name"],
                "actor_id": row["actor_id"],
                "created_at": row["created_at"].isoformat() if row["created_at"] else None,
                "updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
            }
            for row in rows
        ]