Add content-addressable runs - runs identified by hash of inputs + recipe
- Add run_cache table for fast run_id -> output lookup - compute_run_id() computes deterministic run_id from inputs + recipe - create_run checks L1 cache then L2 before running Celery - If output exists on L2 but not L1, pulls from IPFS - Saves run results to cache on completion Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
99
database.py
99
database.py
@@ -71,6 +71,19 @@ CREATE TABLE IF NOT EXISTS l2_shares (
|
||||
UNIQUE(content_hash, actor_id, l2_server, content_type)
|
||||
);
|
||||
|
||||
-- Run cache: maps content-addressable run_id to output
|
||||
-- run_id is a hash of (sorted inputs + recipe), making runs deterministic
|
||||
CREATE TABLE IF NOT EXISTS run_cache (
|
||||
run_id VARCHAR(64) PRIMARY KEY,
|
||||
output_hash VARCHAR(64) NOT NULL,
|
||||
ipfs_cid VARCHAR(128),
|
||||
provenance_cid VARCHAR(128),
|
||||
recipe VARCHAR(255) NOT NULL,
|
||||
inputs JSONB NOT NULL,
|
||||
actor_id VARCHAR(255),
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_item_types_content_hash ON item_types(content_hash);
|
||||
CREATE INDEX IF NOT EXISTS idx_item_types_actor_id ON item_types(actor_id);
|
||||
@@ -79,6 +92,7 @@ CREATE INDEX IF NOT EXISTS idx_item_types_path ON item_types(path);
|
||||
CREATE INDEX IF NOT EXISTS idx_pin_reasons_item_type ON pin_reasons(item_type_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_l2_shares_content_hash ON l2_shares(content_hash);
|
||||
CREATE INDEX IF NOT EXISTS idx_l2_shares_actor_id ON l2_shares(actor_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_run_cache_output ON run_cache(output_hash);
|
||||
"""
|
||||
|
||||
|
||||
@@ -988,3 +1002,88 @@ async def count_user_items(actor_id: str, item_type: Optional[str] = None) -> in
|
||||
"SELECT COUNT(DISTINCT content_hash) FROM item_types WHERE actor_id = $1",
|
||||
actor_id
|
||||
)
|
||||
|
||||
|
||||
# ============ Run Cache ============
|
||||
|
||||
async def get_run_cache(run_id: str) -> Optional[dict]:
|
||||
"""Get cached run result by content-addressable run_id."""
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
SELECT run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
|
||||
FROM run_cache WHERE run_id = $1
|
||||
""",
|
||||
run_id
|
||||
)
|
||||
if row:
|
||||
return {
|
||||
"run_id": row["run_id"],
|
||||
"output_hash": row["output_hash"],
|
||||
"ipfs_cid": row["ipfs_cid"],
|
||||
"provenance_cid": row["provenance_cid"],
|
||||
"recipe": row["recipe"],
|
||||
"inputs": row["inputs"],
|
||||
"actor_id": row["actor_id"],
|
||||
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
async def save_run_cache(
|
||||
run_id: str,
|
||||
output_hash: str,
|
||||
recipe: str,
|
||||
inputs: List[str],
|
||||
ipfs_cid: Optional[str] = None,
|
||||
provenance_cid: Optional[str] = None,
|
||||
actor_id: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""Save run result to cache. Updates if run_id already exists."""
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
INSERT INTO run_cache (run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
||||
ON CONFLICT (run_id) DO UPDATE SET
|
||||
output_hash = EXCLUDED.output_hash,
|
||||
ipfs_cid = COALESCE(EXCLUDED.ipfs_cid, run_cache.ipfs_cid),
|
||||
provenance_cid = COALESCE(EXCLUDED.provenance_cid, run_cache.provenance_cid)
|
||||
RETURNING run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
|
||||
""",
|
||||
run_id, output_hash, ipfs_cid, provenance_cid, recipe, _json.dumps(inputs), actor_id
|
||||
)
|
||||
return {
|
||||
"run_id": row["run_id"],
|
||||
"output_hash": row["output_hash"],
|
||||
"ipfs_cid": row["ipfs_cid"],
|
||||
"provenance_cid": row["provenance_cid"],
|
||||
"recipe": row["recipe"],
|
||||
"inputs": row["inputs"],
|
||||
"actor_id": row["actor_id"],
|
||||
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
|
||||
}
|
||||
|
||||
|
||||
async def get_run_by_output(output_hash: str) -> Optional[dict]:
|
||||
"""Get run cache entry by output hash."""
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
SELECT run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
|
||||
FROM run_cache WHERE output_hash = $1
|
||||
""",
|
||||
output_hash
|
||||
)
|
||||
if row:
|
||||
return {
|
||||
"run_id": row["run_id"],
|
||||
"output_hash": row["output_hash"],
|
||||
"ipfs_cid": row["ipfs_cid"],
|
||||
"provenance_cid": row["provenance_cid"],
|
||||
"recipe": row["recipe"],
|
||||
"inputs": row["inputs"],
|
||||
"actor_id": row["actor_id"],
|
||||
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
|
||||
}
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user