Files
celery/database.py
gilesb 92d26b2b72 Rename content_hash/output_hash to cid throughout
Refactor to use IPFS CID as the primary content identifier:
- Update database schema: content_hash -> cid, output_hash -> output_cid
- Update all services, routers, and tasks to use cid terminology
- Update HTML templates to display CID instead of hash
- Update cache_manager parameter names
- Update README documentation

This completes the transition to CID-only content addressing.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 08:02:44 +00:00

1572 lines
56 KiB
Python

# art-celery/database.py
"""
PostgreSQL database module for Art DAG L1 server.
Provides connection pooling and CRUD operations for cache metadata.
"""
import os
from datetime import datetime, timezone
from typing import List, Optional
import asyncpg
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://artdag:artdag@localhost:5432/artdag")
pool: Optional[asyncpg.Pool] = None
SCHEMA_SQL = """
-- Core cache: just content hash and IPFS CID
-- Physical file storage - shared by all users
CREATE TABLE IF NOT EXISTS cache_items (
cid VARCHAR(64) PRIMARY KEY,
ipfs_cid VARCHAR(128),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Item types: per-user metadata (same item can be recipe AND media, per user)
-- actor_id format: @username@server (ActivityPub style)
CREATE TABLE IF NOT EXISTS item_types (
id SERIAL PRIMARY KEY,
cid VARCHAR(64) REFERENCES cache_items(cid) ON DELETE CASCADE,
actor_id VARCHAR(255) NOT NULL,
type VARCHAR(50) NOT NULL,
path VARCHAR(255),
description TEXT,
source_type VARCHAR(20),
source_url TEXT,
source_note TEXT,
pinned BOOLEAN DEFAULT FALSE,
filename VARCHAR(255),
metadata JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(cid, actor_id, type, path)
);
-- Add columns if they don't exist (for existing databases)
DO $$ BEGIN
ALTER TABLE item_types ADD COLUMN IF NOT EXISTS filename VARCHAR(255);
ALTER TABLE item_types ADD COLUMN IF NOT EXISTS metadata JSONB DEFAULT '{}';
EXCEPTION WHEN others THEN NULL;
END $$;
-- Pin reasons: one-to-many from item_types
CREATE TABLE IF NOT EXISTS pin_reasons (
id SERIAL PRIMARY KEY,
item_type_id INTEGER REFERENCES item_types(id) ON DELETE CASCADE,
reason VARCHAR(100) NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- L2 shares: per-user shares (includes content_type for role when shared)
CREATE TABLE IF NOT EXISTS l2_shares (
id SERIAL PRIMARY KEY,
cid VARCHAR(64) REFERENCES cache_items(cid) ON DELETE CASCADE,
actor_id VARCHAR(255) NOT NULL,
l2_server VARCHAR(255) NOT NULL,
asset_name VARCHAR(255) NOT NULL,
activity_id VARCHAR(128),
content_type VARCHAR(50) NOT NULL,
published_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
last_synced_at TIMESTAMP WITH TIME ZONE,
UNIQUE(cid, actor_id, l2_server, content_type)
);
-- Add activity_id column if it doesn't exist (for existing databases)
DO $$ BEGIN
ALTER TABLE l2_shares ADD COLUMN IF NOT EXISTS activity_id VARCHAR(128);
EXCEPTION WHEN others THEN NULL;
END $$;
-- Run cache: maps content-addressable run_id to output
-- run_id is a hash of (sorted inputs + recipe), making runs deterministic
CREATE TABLE IF NOT EXISTS run_cache (
run_id VARCHAR(64) PRIMARY KEY,
output_cid VARCHAR(64) NOT NULL,
ipfs_cid VARCHAR(128),
provenance_cid VARCHAR(128),
recipe VARCHAR(255) NOT NULL,
inputs JSONB NOT NULL,
actor_id VARCHAR(255),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Pending/running runs: tracks in-progress work for durability
-- Allows runs to survive restarts and be recovered
CREATE TABLE IF NOT EXISTS pending_runs (
run_id VARCHAR(64) PRIMARY KEY,
celery_task_id VARCHAR(128),
status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending, running, failed
recipe VARCHAR(255) NOT NULL,
inputs JSONB NOT NULL,
dag_json TEXT,
output_name VARCHAR(255),
actor_id VARCHAR(255),
error TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_pending_runs_status ON pending_runs(status);
CREATE INDEX IF NOT EXISTS idx_pending_runs_actor ON pending_runs(actor_id);
-- User storage backends (synced from L2 or configured locally)
CREATE TABLE IF NOT EXISTS storage_backends (
id SERIAL PRIMARY KEY,
actor_id VARCHAR(255) NOT NULL,
provider_type VARCHAR(50) NOT NULL, -- 'pinata', 'web3storage', 'nftstorage', 'infura', 'filebase', 'storj', 'local'
provider_name VARCHAR(255),
description TEXT,
config JSONB NOT NULL DEFAULT '{}',
capacity_gb INTEGER NOT NULL,
used_bytes BIGINT DEFAULT 0,
is_active BOOLEAN DEFAULT true,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
synced_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Storage pins tracking (what's pinned where)
CREATE TABLE IF NOT EXISTS storage_pins (
id SERIAL PRIMARY KEY,
cid VARCHAR(64) NOT NULL,
storage_id INTEGER NOT NULL REFERENCES storage_backends(id) ON DELETE CASCADE,
ipfs_cid VARCHAR(128),
pin_type VARCHAR(20) NOT NULL, -- 'user_content', 'donated', 'system'
size_bytes BIGINT,
pinned_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(cid, storage_id)
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_item_types_cid ON item_types(cid);
CREATE INDEX IF NOT EXISTS idx_item_types_actor_id ON item_types(actor_id);
CREATE INDEX IF NOT EXISTS idx_item_types_type ON item_types(type);
CREATE INDEX IF NOT EXISTS idx_item_types_path ON item_types(path);
CREATE INDEX IF NOT EXISTS idx_pin_reasons_item_type ON pin_reasons(item_type_id);
CREATE INDEX IF NOT EXISTS idx_l2_shares_cid ON l2_shares(cid);
CREATE INDEX IF NOT EXISTS idx_l2_shares_actor_id ON l2_shares(actor_id);
CREATE INDEX IF NOT EXISTS idx_run_cache_output ON run_cache(output_cid);
CREATE INDEX IF NOT EXISTS idx_storage_backends_actor ON storage_backends(actor_id);
CREATE INDEX IF NOT EXISTS idx_storage_backends_type ON storage_backends(provider_type);
CREATE INDEX IF NOT EXISTS idx_storage_pins_hash ON storage_pins(cid);
CREATE INDEX IF NOT EXISTS idx_storage_pins_storage ON storage_pins(storage_id);
"""
async def init_db():
"""Initialize database connection pool and create schema."""
global pool
pool = await asyncpg.create_pool(DATABASE_URL)
async with pool.acquire() as conn:
await conn.execute(SCHEMA_SQL)
async def close_db():
"""Close database connection pool."""
global pool
if pool:
await pool.close()
pool = None
# ============ Cache Items ============
async def create_cache_item(cid: str, ipfs_cid: Optional[str] = None) -> dict:
"""Create a cache item. Returns the created item."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
INSERT INTO cache_items (cid, ipfs_cid)
VALUES ($1, $2)
ON CONFLICT (cid) DO UPDATE SET ipfs_cid = COALESCE($2, cache_items.ipfs_cid)
RETURNING cid, ipfs_cid, created_at
""",
cid, ipfs_cid
)
return dict(row)
async def get_cache_item(cid: str) -> Optional[dict]:
"""Get a cache item by content hash."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT cid, ipfs_cid, created_at FROM cache_items WHERE cid = $1",
cid
)
return dict(row) if row else None
async def update_cache_item_ipfs_cid(cid: str, ipfs_cid: str) -> bool:
"""Update the IPFS CID for a cache item."""
async with pool.acquire() as conn:
result = await conn.execute(
"UPDATE cache_items SET ipfs_cid = $2 WHERE cid = $1",
cid, ipfs_cid
)
return result == "UPDATE 1"
async def delete_cache_item(cid: str) -> bool:
"""Delete a cache item and all associated data (cascades)."""
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM cache_items WHERE cid = $1",
cid
)
return result == "DELETE 1"
async def list_cache_items(limit: int = 100, offset: int = 0) -> List[dict]:
"""List cache items with pagination."""
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT cid, ipfs_cid, created_at
FROM cache_items
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
""",
limit, offset
)
return [dict(row) for row in rows]
# ============ Item Types ============
async def add_item_type(
cid: str,
actor_id: str,
item_type: str,
path: Optional[str] = None,
description: Optional[str] = None,
source_type: Optional[str] = None,
source_url: Optional[str] = None,
source_note: Optional[str] = None,
) -> dict:
"""Add a type to a cache item for a user. Creates cache_item if needed."""
async with pool.acquire() as conn:
# Ensure cache_item exists
await conn.execute(
"INSERT INTO cache_items (cid) VALUES ($1) ON CONFLICT DO NOTHING",
cid
)
# Insert or update item_type
row = await conn.fetchrow(
"""
INSERT INTO item_types (cid, actor_id, type, path, description, source_type, source_url, source_note)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (cid, actor_id, type, path) DO UPDATE SET
description = COALESCE($5, item_types.description),
source_type = COALESCE($6, item_types.source_type),
source_url = COALESCE($7, item_types.source_url),
source_note = COALESCE($8, item_types.source_note)
RETURNING id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
""",
cid, actor_id, item_type, path, description, source_type, source_url, source_note
)
return dict(row)
async def get_item_types(cid: str, actor_id: Optional[str] = None) -> List[dict]:
"""Get types for a cache item, optionally filtered by user."""
async with pool.acquire() as conn:
if actor_id:
rows = await conn.fetch(
"""
SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
FROM item_types
WHERE cid = $1 AND actor_id = $2
ORDER BY created_at
""",
cid, actor_id
)
else:
rows = await conn.fetch(
"""
SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
FROM item_types
WHERE cid = $1
ORDER BY created_at
""",
cid
)
return [dict(row) for row in rows]
async def get_item_type(cid: str, actor_id: str, item_type: str, path: Optional[str] = None) -> Optional[dict]:
"""Get a specific type for a cache item and user."""
async with pool.acquire() as conn:
if path is None:
row = await conn.fetchrow(
"""
SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
FROM item_types
WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
""",
cid, actor_id, item_type
)
else:
row = await conn.fetchrow(
"""
SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
FROM item_types
WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path = $4
""",
cid, actor_id, item_type, path
)
return dict(row) if row else None
async def update_item_type(
item_type_id: int,
description: Optional[str] = None,
source_type: Optional[str] = None,
source_url: Optional[str] = None,
source_note: Optional[str] = None,
) -> bool:
"""Update an item type's metadata."""
async with pool.acquire() as conn:
result = await conn.execute(
"""
UPDATE item_types SET
description = COALESCE($2, description),
source_type = COALESCE($3, source_type),
source_url = COALESCE($4, source_url),
source_note = COALESCE($5, source_note)
WHERE id = $1
""",
item_type_id, description, source_type, source_url, source_note
)
return result == "UPDATE 1"
async def delete_item_type(cid: str, actor_id: str, item_type: str, path: Optional[str] = None) -> bool:
"""Delete a specific type from a cache item for a user."""
async with pool.acquire() as conn:
if path is None:
result = await conn.execute(
"DELETE FROM item_types WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL",
cid, actor_id, item_type
)
else:
result = await conn.execute(
"DELETE FROM item_types WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path = $4",
cid, actor_id, item_type, path
)
return result == "DELETE 1"
async def list_items_by_type(item_type: str, actor_id: Optional[str] = None, limit: int = 100, offset: int = 0) -> List[dict]:
"""List items of a specific type, optionally filtered by user."""
async with pool.acquire() as conn:
if actor_id:
rows = await conn.fetch(
"""
SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
ci.ipfs_cid
FROM item_types it
JOIN cache_items ci ON it.cid = ci.cid
WHERE it.type = $1 AND it.actor_id = $2
ORDER BY it.created_at DESC
LIMIT $3 OFFSET $4
""",
item_type, actor_id, limit, offset
)
else:
rows = await conn.fetch(
"""
SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
ci.ipfs_cid
FROM item_types it
JOIN cache_items ci ON it.cid = ci.cid
WHERE it.type = $1
ORDER BY it.created_at DESC
LIMIT $2 OFFSET $3
""",
item_type, limit, offset
)
return [dict(row) for row in rows]
async def get_item_by_path(item_type: str, path: str, actor_id: Optional[str] = None) -> Optional[dict]:
"""Get an item by its type and path (e.g., recipe:/effects/dog), optionally for a specific user."""
async with pool.acquire() as conn:
if actor_id:
row = await conn.fetchrow(
"""
SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
ci.ipfs_cid
FROM item_types it
JOIN cache_items ci ON it.cid = ci.cid
WHERE it.type = $1 AND it.path = $2 AND it.actor_id = $3
""",
item_type, path, actor_id
)
else:
row = await conn.fetchrow(
"""
SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
ci.ipfs_cid
FROM item_types it
JOIN cache_items ci ON it.cid = ci.cid
WHERE it.type = $1 AND it.path = $2
""",
item_type, path
)
return dict(row) if row else None
# ============ Pinning ============
async def pin_item_type(item_type_id: int, reason: str) -> bool:
"""Pin an item type with a reason."""
async with pool.acquire() as conn:
async with conn.transaction():
# Set pinned flag
await conn.execute(
"UPDATE item_types SET pinned = TRUE WHERE id = $1",
item_type_id
)
# Add pin reason
await conn.execute(
"INSERT INTO pin_reasons (item_type_id, reason) VALUES ($1, $2)",
item_type_id, reason
)
return True
async def unpin_item_type(item_type_id: int, reason: Optional[str] = None) -> bool:
"""Remove a pin reason from an item type. If no reasons left, unpins the item."""
async with pool.acquire() as conn:
async with conn.transaction():
if reason:
# Remove specific reason
await conn.execute(
"DELETE FROM pin_reasons WHERE item_type_id = $1 AND reason = $2",
item_type_id, reason
)
else:
# Remove all reasons
await conn.execute(
"DELETE FROM pin_reasons WHERE item_type_id = $1",
item_type_id
)
# Check if any reasons remain
count = await conn.fetchval(
"SELECT COUNT(*) FROM pin_reasons WHERE item_type_id = $1",
item_type_id
)
if count == 0:
await conn.execute(
"UPDATE item_types SET pinned = FALSE WHERE id = $1",
item_type_id
)
return True
async def get_pin_reasons(item_type_id: int) -> List[dict]:
"""Get all pin reasons for an item type."""
async with pool.acquire() as conn:
rows = await conn.fetch(
"SELECT id, reason, created_at FROM pin_reasons WHERE item_type_id = $1 ORDER BY created_at",
item_type_id
)
return [dict(row) for row in rows]
async def is_item_pinned(cid: str, item_type: Optional[str] = None) -> tuple[bool, List[str]]:
"""Check if any type of a cache item is pinned. Returns (is_pinned, reasons)."""
async with pool.acquire() as conn:
if item_type:
rows = await conn.fetch(
"""
SELECT pr.reason
FROM pin_reasons pr
JOIN item_types it ON pr.item_type_id = it.id
WHERE it.cid = $1 AND it.type = $2 AND it.pinned = TRUE
""",
cid, item_type
)
else:
rows = await conn.fetch(
"""
SELECT pr.reason
FROM pin_reasons pr
JOIN item_types it ON pr.item_type_id = it.id
WHERE it.cid = $1 AND it.pinned = TRUE
""",
cid
)
reasons = [row["reason"] for row in rows]
return len(reasons) > 0, reasons
# ============ L2 Shares ============
async def add_l2_share(
cid: str,
actor_id: str,
l2_server: str,
asset_name: str,
content_type: str,
) -> dict:
"""Add or update an L2 share for a user."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
INSERT INTO l2_shares (cid, actor_id, l2_server, asset_name, content_type, last_synced_at)
VALUES ($1, $2, $3, $4, $5, NOW())
ON CONFLICT (cid, actor_id, l2_server, content_type) DO UPDATE SET
asset_name = $4,
last_synced_at = NOW()
RETURNING id, cid, actor_id, l2_server, asset_name, content_type, published_at, last_synced_at
""",
cid, actor_id, l2_server, asset_name, content_type
)
return dict(row)
async def get_l2_shares(cid: str, actor_id: Optional[str] = None) -> List[dict]:
"""Get L2 shares for a cache item, optionally filtered by user."""
async with pool.acquire() as conn:
if actor_id:
rows = await conn.fetch(
"""
SELECT id, cid, actor_id, l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
FROM l2_shares
WHERE cid = $1 AND actor_id = $2
ORDER BY published_at
""",
cid, actor_id
)
else:
rows = await conn.fetch(
"""
SELECT id, cid, actor_id, l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
FROM l2_shares
WHERE cid = $1
ORDER BY published_at
""",
cid
)
return [dict(row) for row in rows]
async def delete_l2_share(cid: str, actor_id: str, l2_server: str, content_type: str) -> bool:
"""Delete an L2 share for a user."""
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM l2_shares WHERE cid = $1 AND actor_id = $2 AND l2_server = $3 AND content_type = $4",
cid, actor_id, l2_server, content_type
)
return result == "DELETE 1"
# ============ Cache Item Cleanup ============
async def has_remaining_references(cid: str) -> bool:
"""Check if a cache item has any remaining item_types or l2_shares."""
async with pool.acquire() as conn:
item_types_count = await conn.fetchval(
"SELECT COUNT(*) FROM item_types WHERE cid = $1",
cid
)
if item_types_count > 0:
return True
l2_shares_count = await conn.fetchval(
"SELECT COUNT(*) FROM l2_shares WHERE cid = $1",
cid
)
return l2_shares_count > 0
async def cleanup_orphaned_cache_item(cid: str) -> bool:
"""Delete a cache item if it has no remaining references. Returns True if deleted."""
async with pool.acquire() as conn:
# Only delete if no item_types or l2_shares reference it
result = await conn.execute(
"""
DELETE FROM cache_items
WHERE cid = $1
AND NOT EXISTS (SELECT 1 FROM item_types WHERE cid = $1)
AND NOT EXISTS (SELECT 1 FROM l2_shares WHERE cid = $1)
""",
cid
)
return result == "DELETE 1"
# ============ High-Level Metadata Functions ============
# These provide a compatible interface to the old JSON-based save_cache_meta/load_cache_meta
import json as _json
async def save_item_metadata(
cid: str,
actor_id: str,
item_type: str = "media",
filename: Optional[str] = None,
description: Optional[str] = None,
source_type: Optional[str] = None,
source_url: Optional[str] = None,
source_note: Optional[str] = None,
pinned: bool = False,
pin_reason: Optional[str] = None,
tags: Optional[List[str]] = None,
folder: Optional[str] = None,
collections: Optional[List[str]] = None,
**extra_metadata
) -> dict:
"""
Save or update item metadata in the database.
Returns a dict with the item metadata (compatible with old JSON format).
"""
# Build metadata JSONB for extra fields
metadata = {}
if tags:
metadata["tags"] = tags
if folder:
metadata["folder"] = folder
if collections:
metadata["collections"] = collections
metadata.update(extra_metadata)
async with pool.acquire() as conn:
# Ensure cache_item exists
await conn.execute(
"INSERT INTO cache_items (cid) VALUES ($1) ON CONFLICT DO NOTHING",
cid
)
# Upsert item_type
row = await conn.fetchrow(
"""
INSERT INTO item_types (cid, actor_id, type, description, source_type, source_url, source_note, pinned, filename, metadata)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
ON CONFLICT (cid, actor_id, type, path) DO UPDATE SET
description = COALESCE(EXCLUDED.description, item_types.description),
source_type = COALESCE(EXCLUDED.source_type, item_types.source_type),
source_url = COALESCE(EXCLUDED.source_url, item_types.source_url),
source_note = COALESCE(EXCLUDED.source_note, item_types.source_note),
pinned = EXCLUDED.pinned,
filename = COALESCE(EXCLUDED.filename, item_types.filename),
metadata = item_types.metadata || EXCLUDED.metadata
RETURNING id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at
""",
cid, actor_id, item_type, description, source_type, source_url, source_note, pinned, filename, _json.dumps(metadata)
)
item_type_id = row["id"]
# Handle pinning
if pinned and pin_reason:
# Add pin reason if not exists
await conn.execute(
"""
INSERT INTO pin_reasons (item_type_id, reason)
VALUES ($1, $2)
ON CONFLICT DO NOTHING
""",
item_type_id, pin_reason
)
# Build response dict (compatible with old format)
result = {
"uploader": actor_id,
"uploaded_at": row["created_at"].isoformat() if row["created_at"] else None,
"filename": row["filename"],
"type": row["type"],
"description": row["description"],
"pinned": row["pinned"],
}
# Add origin if present
if row["source_type"] or row["source_url"] or row["source_note"]:
result["origin"] = {
"type": row["source_type"],
"url": row["source_url"],
"note": row["source_note"]
}
# Add metadata fields
if row["metadata"]:
meta = row["metadata"] if isinstance(row["metadata"], dict) else _json.loads(row["metadata"])
if meta.get("tags"):
result["tags"] = meta["tags"]
if meta.get("folder"):
result["folder"] = meta["folder"]
if meta.get("collections"):
result["collections"] = meta["collections"]
# Get pin reasons
if row["pinned"]:
reasons = await conn.fetch(
"SELECT reason FROM pin_reasons WHERE item_type_id = $1",
item_type_id
)
if reasons:
result["pin_reason"] = reasons[0]["reason"]
return result
async def load_item_metadata(cid: str, actor_id: Optional[str] = None) -> dict:
"""
Load item metadata from the database.
If actor_id is provided, returns metadata for that user's view of the item.
Otherwise, returns combined metadata from all users (for backwards compat).
Returns a dict compatible with old JSON format.
"""
async with pool.acquire() as conn:
# Get cache item
cache_item = await conn.fetchrow(
"SELECT cid, ipfs_cid, created_at FROM cache_items WHERE cid = $1",
cid
)
if not cache_item:
return {}
# Get item types
if actor_id:
item_types = await conn.fetch(
"""
SELECT id, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at
FROM item_types WHERE cid = $1 AND actor_id = $2
ORDER BY created_at
""",
cid, actor_id
)
else:
item_types = await conn.fetch(
"""
SELECT id, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at
FROM item_types WHERE cid = $1
ORDER BY created_at
""",
cid
)
if not item_types:
return {"uploaded_at": cache_item["created_at"].isoformat() if cache_item["created_at"] else None}
# Use first item type as primary (for backwards compat)
primary = item_types[0]
result = {
"uploader": primary["actor_id"],
"uploaded_at": primary["created_at"].isoformat() if primary["created_at"] else None,
"filename": primary["filename"],
"type": primary["type"],
"description": primary["description"],
"pinned": any(it["pinned"] for it in item_types),
}
# Add origin if present
if primary["source_type"] or primary["source_url"] or primary["source_note"]:
result["origin"] = {
"type": primary["source_type"],
"url": primary["source_url"],
"note": primary["source_note"]
}
# Add metadata fields
if primary["metadata"]:
meta = primary["metadata"] if isinstance(primary["metadata"], dict) else _json.loads(primary["metadata"])
if meta.get("tags"):
result["tags"] = meta["tags"]
if meta.get("folder"):
result["folder"] = meta["folder"]
if meta.get("collections"):
result["collections"] = meta["collections"]
# Get pin reasons for pinned items
for it in item_types:
if it["pinned"]:
reasons = await conn.fetch(
"SELECT reason FROM pin_reasons WHERE item_type_id = $1",
it["id"]
)
if reasons:
result["pin_reason"] = reasons[0]["reason"]
break
# Get L2 shares
if actor_id:
shares = await conn.fetch(
"""
SELECT l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
FROM l2_shares WHERE cid = $1 AND actor_id = $2
""",
cid, actor_id
)
else:
shares = await conn.fetch(
"""
SELECT l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
FROM l2_shares WHERE cid = $1
""",
cid
)
if shares:
result["l2_shares"] = [
{
"l2_server": s["l2_server"],
"asset_name": s["asset_name"],
"activity_id": s["activity_id"],
"content_type": s["content_type"],
"published_at": s["published_at"].isoformat() if s["published_at"] else None,
"last_synced_at": s["last_synced_at"].isoformat() if s["last_synced_at"] else None,
}
for s in shares
]
# For backwards compat, also set "published" if shared
result["published"] = {
"to_l2": True,
"asset_name": shares[0]["asset_name"],
"activity_id": shares[0]["activity_id"],
"l2_server": shares[0]["l2_server"],
}
return result
async def update_item_metadata(
cid: str,
actor_id: str,
item_type: str = "media",
**updates
) -> dict:
"""
Update specific fields of item metadata.
Returns updated metadata dict.
"""
# Extract known fields from updates
description = updates.pop("description", None)
source_type = updates.pop("source_type", None)
source_url = updates.pop("source_url", None)
source_note = updates.pop("source_note", None)
# Handle origin dict format
origin = updates.pop("origin", None)
if origin:
source_type = origin.get("type", source_type)
source_url = origin.get("url", source_url)
source_note = origin.get("note", source_note)
pinned = updates.pop("pinned", None)
pin_reason = updates.pop("pin_reason", None)
filename = updates.pop("filename", None)
tags = updates.pop("tags", None)
folder = updates.pop("folder", None)
collections = updates.pop("collections", None)
async with pool.acquire() as conn:
# Get existing item_type
existing = await conn.fetchrow(
"""
SELECT id, metadata FROM item_types
WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
""",
cid, actor_id, item_type
)
if not existing:
# Create new entry
return await save_item_metadata(
cid, actor_id, item_type,
filename=filename, description=description,
source_type=source_type, source_url=source_url, source_note=source_note,
pinned=pinned or False, pin_reason=pin_reason,
tags=tags, folder=folder, collections=collections,
**updates
)
# Build update query dynamically
set_parts = []
params = [cid, actor_id, item_type]
param_idx = 4
if description is not None:
set_parts.append(f"description = ${param_idx}")
params.append(description)
param_idx += 1
if source_type is not None:
set_parts.append(f"source_type = ${param_idx}")
params.append(source_type)
param_idx += 1
if source_url is not None:
set_parts.append(f"source_url = ${param_idx}")
params.append(source_url)
param_idx += 1
if source_note is not None:
set_parts.append(f"source_note = ${param_idx}")
params.append(source_note)
param_idx += 1
if pinned is not None:
set_parts.append(f"pinned = ${param_idx}")
params.append(pinned)
param_idx += 1
if filename is not None:
set_parts.append(f"filename = ${param_idx}")
params.append(filename)
param_idx += 1
# Handle metadata updates
current_metadata = existing["metadata"] if isinstance(existing["metadata"], dict) else (_json.loads(existing["metadata"]) if existing["metadata"] else {})
if tags is not None:
current_metadata["tags"] = tags
if folder is not None:
current_metadata["folder"] = folder
if collections is not None:
current_metadata["collections"] = collections
current_metadata.update(updates)
if current_metadata:
set_parts.append(f"metadata = ${param_idx}")
params.append(_json.dumps(current_metadata))
param_idx += 1
if set_parts:
query = f"""
UPDATE item_types SET {', '.join(set_parts)}
WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
"""
await conn.execute(query, *params)
# Handle pin reason
if pinned and pin_reason:
await conn.execute(
"""
INSERT INTO pin_reasons (item_type_id, reason)
VALUES ($1, $2)
ON CONFLICT DO NOTHING
""",
existing["id"], pin_reason
)
return await load_item_metadata(cid, actor_id)
async def save_l2_share(
cid: str,
actor_id: str,
l2_server: str,
asset_name: str,
content_type: str = "media",
activity_id: Optional[str] = None
) -> dict:
"""Save an L2 share and return share info."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
INSERT INTO l2_shares (cid, actor_id, l2_server, asset_name, activity_id, content_type, last_synced_at)
VALUES ($1, $2, $3, $4, $5, $6, NOW())
ON CONFLICT (cid, actor_id, l2_server, content_type) DO UPDATE SET
asset_name = EXCLUDED.asset_name,
activity_id = COALESCE(EXCLUDED.activity_id, l2_shares.activity_id),
last_synced_at = NOW()
RETURNING l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
""",
cid, actor_id, l2_server, asset_name, activity_id, content_type
)
return {
"l2_server": row["l2_server"],
"asset_name": row["asset_name"],
"activity_id": row["activity_id"],
"content_type": row["content_type"],
"published_at": row["published_at"].isoformat() if row["published_at"] else None,
"last_synced_at": row["last_synced_at"].isoformat() if row["last_synced_at"] else None,
}
async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit: int = 100, offset: int = 0) -> List[dict]:
"""Get all items for a user, optionally filtered by type. Deduplicates by cid."""
async with pool.acquire() as conn:
if item_type:
rows = await conn.fetch(
"""
SELECT * FROM (
SELECT DISTINCT ON (it.cid)
it.cid, it.type, it.description, it.filename, it.pinned, it.created_at,
ci.ipfs_cid
FROM item_types it
JOIN cache_items ci ON it.cid = ci.cid
WHERE it.actor_id = $1 AND it.type = $2
ORDER BY it.cid, it.created_at DESC
) deduped
ORDER BY created_at DESC
LIMIT $3 OFFSET $4
""",
actor_id, item_type, limit, offset
)
else:
rows = await conn.fetch(
"""
SELECT * FROM (
SELECT DISTINCT ON (it.cid)
it.cid, it.type, it.description, it.filename, it.pinned, it.created_at,
ci.ipfs_cid
FROM item_types it
JOIN cache_items ci ON it.cid = ci.cid
WHERE it.actor_id = $1
ORDER BY it.cid, it.created_at DESC
) deduped
ORDER BY created_at DESC
LIMIT $2 OFFSET $3
""",
actor_id, limit, offset
)
return [
{
"cid": r["cid"],
"type": r["type"],
"description": r["description"],
"filename": r["filename"],
"pinned": r["pinned"],
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
"ipfs_cid": r["ipfs_cid"],
}
for r in rows
]
async def count_user_items(actor_id: str, item_type: Optional[str] = None) -> int:
"""Count unique items (by cid) for a user."""
async with pool.acquire() as conn:
if item_type:
return await conn.fetchval(
"SELECT COUNT(DISTINCT cid) FROM item_types WHERE actor_id = $1 AND type = $2",
actor_id, item_type
)
else:
return await conn.fetchval(
"SELECT COUNT(DISTINCT cid) FROM item_types WHERE actor_id = $1",
actor_id
)
# ============ Run Cache ============
async def get_run_cache(run_id: str) -> Optional[dict]:
"""Get cached run result by content-addressable run_id."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
FROM run_cache WHERE run_id = $1
""",
run_id
)
if row:
return {
"run_id": row["run_id"],
"output_cid": row["output_cid"],
"ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"],
"recipe": row["recipe"],
"inputs": row["inputs"],
"actor_id": row["actor_id"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
return None
async def save_run_cache(
run_id: str,
output_cid: str,
recipe: str,
inputs: List[str],
ipfs_cid: Optional[str] = None,
provenance_cid: Optional[str] = None,
actor_id: Optional[str] = None,
) -> dict:
"""Save run result to cache. Updates if run_id already exists."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
INSERT INTO run_cache (run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (run_id) DO UPDATE SET
output_cid = EXCLUDED.output_cid,
ipfs_cid = COALESCE(EXCLUDED.ipfs_cid, run_cache.ipfs_cid),
provenance_cid = COALESCE(EXCLUDED.provenance_cid, run_cache.provenance_cid)
RETURNING run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
""",
run_id, output_cid, ipfs_cid, provenance_cid, recipe, _json.dumps(inputs), actor_id
)
return {
"run_id": row["run_id"],
"output_cid": row["output_cid"],
"ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"],
"recipe": row["recipe"],
"inputs": row["inputs"],
"actor_id": row["actor_id"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
async def get_run_by_output(output_cid: str) -> Optional[dict]:
"""Get run cache entry by output hash."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
FROM run_cache WHERE output_cid = $1
""",
output_cid
)
if row:
return {
"run_id": row["run_id"],
"output_cid": row["output_cid"],
"ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"],
"recipe": row["recipe"],
"inputs": row["inputs"],
"actor_id": row["actor_id"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
return None
def _parse_inputs(inputs_value):
"""Parse inputs from database - may be JSON string, list, or None."""
if inputs_value is None:
return []
if isinstance(inputs_value, list):
return inputs_value
if isinstance(inputs_value, str):
try:
parsed = _json.loads(inputs_value)
if isinstance(parsed, list):
return parsed
return []
except (_json.JSONDecodeError, TypeError):
return []
return []
async def list_runs_by_actor(actor_id: str, offset: int = 0, limit: int = 20) -> List[dict]:
"""List completed runs for a user, ordered by creation time (newest first)."""
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
FROM run_cache
WHERE actor_id = $1
ORDER BY created_at DESC
LIMIT $2 OFFSET $3
""",
actor_id, limit, offset
)
return [
{
"run_id": row["run_id"],
"output_cid": row["output_cid"],
"ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"],
"recipe": row["recipe"],
"inputs": _parse_inputs(row["inputs"]),
"actor_id": row["actor_id"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
"status": "completed",
}
for row in rows
]
# ============ Storage Backends ============
async def get_user_storage(actor_id: str) -> List[dict]:
"""Get all storage backends for a user."""
async with pool.acquire() as conn:
rows = await conn.fetch(
"""SELECT id, actor_id, provider_type, provider_name, description, config,
capacity_gb, used_bytes, is_active, created_at, synced_at
FROM storage_backends WHERE actor_id = $1
ORDER BY provider_type, created_at""",
actor_id
)
return [dict(row) for row in rows]
async def get_user_storage_by_type(actor_id: str, provider_type: str) -> List[dict]:
"""Get storage backends of a specific type for a user."""
async with pool.acquire() as conn:
rows = await conn.fetch(
"""SELECT id, actor_id, provider_type, provider_name, description, config,
capacity_gb, used_bytes, is_active, created_at, synced_at
FROM storage_backends WHERE actor_id = $1 AND provider_type = $2
ORDER BY created_at""",
actor_id, provider_type
)
return [dict(row) for row in rows]
async def get_storage_by_id(storage_id: int) -> Optional[dict]:
"""Get a storage backend by ID."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""SELECT id, actor_id, provider_type, provider_name, description, config,
capacity_gb, used_bytes, is_active, created_at, synced_at
FROM storage_backends WHERE id = $1""",
storage_id
)
return dict(row) if row else None
async def add_user_storage(
actor_id: str,
provider_type: str,
provider_name: str,
config: dict,
capacity_gb: int,
description: Optional[str] = None
) -> Optional[int]:
"""Add a storage backend for a user. Returns storage ID."""
async with pool.acquire() as conn:
try:
row = await conn.fetchrow(
"""INSERT INTO storage_backends (actor_id, provider_type, provider_name, description, config, capacity_gb)
VALUES ($1, $2, $3, $4, $5, $6)
RETURNING id""",
actor_id, provider_type, provider_name, description, _json.dumps(config), capacity_gb
)
return row["id"] if row else None
except Exception:
return None
async def update_user_storage(
storage_id: int,
provider_name: Optional[str] = None,
description: Optional[str] = None,
config: Optional[dict] = None,
capacity_gb: Optional[int] = None,
is_active: Optional[bool] = None
) -> bool:
"""Update a storage backend."""
updates = []
params = []
param_num = 1
if provider_name is not None:
updates.append(f"provider_name = ${param_num}")
params.append(provider_name)
param_num += 1
if description is not None:
updates.append(f"description = ${param_num}")
params.append(description)
param_num += 1
if config is not None:
updates.append(f"config = ${param_num}")
params.append(_json.dumps(config))
param_num += 1
if capacity_gb is not None:
updates.append(f"capacity_gb = ${param_num}")
params.append(capacity_gb)
param_num += 1
if is_active is not None:
updates.append(f"is_active = ${param_num}")
params.append(is_active)
param_num += 1
if not updates:
return False
updates.append("synced_at = NOW()")
params.append(storage_id)
async with pool.acquire() as conn:
result = await conn.execute(
f"UPDATE storage_backends SET {', '.join(updates)} WHERE id = ${param_num}",
*params
)
return "UPDATE 1" in result
async def remove_user_storage(storage_id: int) -> bool:
"""Remove a storage backend. Cascades to storage_pins."""
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM storage_backends WHERE id = $1",
storage_id
)
return "DELETE 1" in result
async def get_storage_usage(storage_id: int) -> dict:
"""Get storage usage stats."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""SELECT
COUNT(*) as pin_count,
COALESCE(SUM(size_bytes), 0) as used_bytes
FROM storage_pins WHERE storage_id = $1""",
storage_id
)
return {"pin_count": row["pin_count"], "used_bytes": row["used_bytes"]}
async def get_all_active_storage() -> List[dict]:
"""Get all active storage backends (for distributed pinning)."""
async with pool.acquire() as conn:
rows = await conn.fetch(
"""SELECT sb.id, sb.actor_id, sb.provider_type, sb.provider_name, sb.description,
sb.config, sb.capacity_gb, sb.is_active, sb.created_at, sb.synced_at,
COALESCE(SUM(sp.size_bytes), 0) as used_bytes,
COUNT(sp.id) as pin_count
FROM storage_backends sb
LEFT JOIN storage_pins sp ON sb.id = sp.storage_id
WHERE sb.is_active = true
GROUP BY sb.id
ORDER BY sb.provider_type, sb.created_at"""
)
return [dict(row) for row in rows]
async def add_storage_pin(
cid: str,
storage_id: int,
ipfs_cid: Optional[str],
pin_type: str,
size_bytes: int
) -> Optional[int]:
"""Add a pin record. Returns pin ID."""
async with pool.acquire() as conn:
try:
row = await conn.fetchrow(
"""INSERT INTO storage_pins (cid, storage_id, ipfs_cid, pin_type, size_bytes)
VALUES ($1, $2, $3, $4, $5)
ON CONFLICT (cid, storage_id) DO UPDATE SET
ipfs_cid = EXCLUDED.ipfs_cid,
pin_type = EXCLUDED.pin_type,
size_bytes = EXCLUDED.size_bytes,
pinned_at = NOW()
RETURNING id""",
cid, storage_id, ipfs_cid, pin_type, size_bytes
)
return row["id"] if row else None
except Exception:
return None
async def remove_storage_pin(cid: str, storage_id: int) -> bool:
"""Remove a pin record."""
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM storage_pins WHERE cid = $1 AND storage_id = $2",
cid, storage_id
)
return "DELETE 1" in result
async def get_pins_for_content(cid: str) -> List[dict]:
"""Get all storage locations where content is pinned."""
async with pool.acquire() as conn:
rows = await conn.fetch(
"""SELECT sp.*, sb.provider_type, sb.provider_name, sb.actor_id
FROM storage_pins sp
JOIN storage_backends sb ON sp.storage_id = sb.id
WHERE sp.cid = $1""",
cid
)
return [dict(row) for row in rows]
# ============ Pending Runs ============
async def create_pending_run(
run_id: str,
celery_task_id: str,
recipe: str,
inputs: List[str],
actor_id: str,
dag_json: Optional[str] = None,
output_name: Optional[str] = None,
) -> dict:
"""Create a pending run record for durability."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
INSERT INTO pending_runs (run_id, celery_task_id, status, recipe, inputs, dag_json, output_name, actor_id)
VALUES ($1, $2, 'running', $3, $4, $5, $6, $7)
ON CONFLICT (run_id) DO UPDATE SET
celery_task_id = EXCLUDED.celery_task_id,
status = 'running',
updated_at = NOW()
RETURNING run_id, celery_task_id, status, recipe, inputs, dag_json, output_name, actor_id, created_at, updated_at
""",
run_id, celery_task_id, recipe, _json.dumps(inputs), dag_json, output_name, actor_id
)
return {
"run_id": row["run_id"],
"celery_task_id": row["celery_task_id"],
"status": row["status"],
"recipe": row["recipe"],
"inputs": row["inputs"],
"dag_json": row["dag_json"],
"output_name": row["output_name"],
"actor_id": row["actor_id"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
"updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
}
async def get_pending_run(run_id: str) -> Optional[dict]:
"""Get a pending run by ID."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT run_id, celery_task_id, status, recipe, inputs, dag_json, output_name, actor_id, error, created_at, updated_at
FROM pending_runs WHERE run_id = $1
""",
run_id
)
if row:
# Parse inputs if it's a string (JSONB should auto-parse but be safe)
inputs = row["inputs"]
if isinstance(inputs, str):
inputs = _json.loads(inputs)
return {
"run_id": row["run_id"],
"celery_task_id": row["celery_task_id"],
"status": row["status"],
"recipe": row["recipe"],
"inputs": inputs,
"dag_json": row["dag_json"],
"output_name": row["output_name"],
"actor_id": row["actor_id"],
"error": row["error"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
"updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
}
return None
async def list_pending_runs(actor_id: Optional[str] = None, status: Optional[str] = None) -> List[dict]:
"""List pending runs, optionally filtered by actor and/or status."""
async with pool.acquire() as conn:
conditions = []
params = []
param_idx = 1
if actor_id:
conditions.append(f"actor_id = ${param_idx}")
params.append(actor_id)
param_idx += 1
if status:
conditions.append(f"status = ${param_idx}")
params.append(status)
param_idx += 1
where_clause = " AND ".join(conditions) if conditions else "TRUE"
rows = await conn.fetch(
f"""
SELECT run_id, celery_task_id, status, recipe, inputs, output_name, actor_id, error, created_at, updated_at
FROM pending_runs
WHERE {where_clause}
ORDER BY created_at DESC
""",
*params
)
results = []
for row in rows:
# Parse inputs if it's a string
inputs = row["inputs"]
if isinstance(inputs, str):
inputs = _json.loads(inputs)
results.append({
"run_id": row["run_id"],
"celery_task_id": row["celery_task_id"],
"status": row["status"],
"recipe": row["recipe"],
"inputs": inputs,
"output_name": row["output_name"],
"actor_id": row["actor_id"],
"error": row["error"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
"updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
})
return results
async def update_pending_run_status(run_id: str, status: str, error: Optional[str] = None) -> bool:
"""Update the status of a pending run."""
async with pool.acquire() as conn:
if error:
result = await conn.execute(
"UPDATE pending_runs SET status = $2, error = $3, updated_at = NOW() WHERE run_id = $1",
run_id, status, error
)
else:
result = await conn.execute(
"UPDATE pending_runs SET status = $2, updated_at = NOW() WHERE run_id = $1",
run_id, status
)
return "UPDATE 1" in result
async def complete_pending_run(run_id: str) -> bool:
"""Remove a pending run after it completes (moves to run_cache)."""
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM pending_runs WHERE run_id = $1",
run_id
)
return "DELETE 1" in result
async def get_stale_pending_runs(older_than_hours: int = 24) -> List[dict]:
"""Get pending runs that haven't been updated recently (for recovery)."""
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT run_id, celery_task_id, status, recipe, inputs, dag_json, output_name, actor_id, created_at, updated_at
FROM pending_runs
WHERE status IN ('pending', 'running')
AND updated_at < NOW() - INTERVAL '%s hours'
ORDER BY created_at
""",
older_than_hours
)
return [
{
"run_id": row["run_id"],
"celery_task_id": row["celery_task_id"],
"status": row["status"],
"recipe": row["recipe"],
"inputs": row["inputs"],
"dag_json": row["dag_json"],
"output_name": row["output_name"],
"actor_id": row["actor_id"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
"updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
}
for row in rows
]