Add content-addressable runs - runs identified by hash of inputs + recipe

- Add run_cache table for fast run_id -> output lookup
- compute_run_id() computes deterministic run_id from inputs + recipe
- create_run checks L1 cache then L2 before running Celery
- If output exists on L2 but not L1, pulls from IPFS
- Saves run results to cache on completion

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-09 11:05:18 +00:00
parent 5344fe263f
commit e4b9657d1b
2 changed files with 235 additions and 3 deletions

View File

@@ -71,6 +71,19 @@ CREATE TABLE IF NOT EXISTS l2_shares (
UNIQUE(content_hash, actor_id, l2_server, content_type)
);
-- Run cache: maps content-addressable run_id to output
-- run_id is a hash of (sorted inputs + recipe), making runs deterministic
CREATE TABLE IF NOT EXISTS run_cache (
run_id VARCHAR(64) PRIMARY KEY,
output_hash VARCHAR(64) NOT NULL,
ipfs_cid VARCHAR(128),
provenance_cid VARCHAR(128),
recipe VARCHAR(255) NOT NULL,
inputs JSONB NOT NULL,
actor_id VARCHAR(255),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_item_types_content_hash ON item_types(content_hash);
CREATE INDEX IF NOT EXISTS idx_item_types_actor_id ON item_types(actor_id);
@@ -79,6 +92,7 @@ CREATE INDEX IF NOT EXISTS idx_item_types_path ON item_types(path);
CREATE INDEX IF NOT EXISTS idx_pin_reasons_item_type ON pin_reasons(item_type_id);
CREATE INDEX IF NOT EXISTS idx_l2_shares_content_hash ON l2_shares(content_hash);
CREATE INDEX IF NOT EXISTS idx_l2_shares_actor_id ON l2_shares(actor_id);
CREATE INDEX IF NOT EXISTS idx_run_cache_output ON run_cache(output_hash);
"""
@@ -988,3 +1002,88 @@ async def count_user_items(actor_id: str, item_type: Optional[str] = None) -> in
"SELECT COUNT(DISTINCT content_hash) FROM item_types WHERE actor_id = $1",
actor_id
)
# ============ Run Cache ============
async def get_run_cache(run_id: str) -> Optional[dict]:
"""Get cached run result by content-addressable run_id."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
FROM run_cache WHERE run_id = $1
""",
run_id
)
if row:
return {
"run_id": row["run_id"],
"output_hash": row["output_hash"],
"ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"],
"recipe": row["recipe"],
"inputs": row["inputs"],
"actor_id": row["actor_id"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
return None
async def save_run_cache(
run_id: str,
output_hash: str,
recipe: str,
inputs: List[str],
ipfs_cid: Optional[str] = None,
provenance_cid: Optional[str] = None,
actor_id: Optional[str] = None,
) -> dict:
"""Save run result to cache. Updates if run_id already exists."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
INSERT INTO run_cache (run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (run_id) DO UPDATE SET
output_hash = EXCLUDED.output_hash,
ipfs_cid = COALESCE(EXCLUDED.ipfs_cid, run_cache.ipfs_cid),
provenance_cid = COALESCE(EXCLUDED.provenance_cid, run_cache.provenance_cid)
RETURNING run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
""",
run_id, output_hash, ipfs_cid, provenance_cid, recipe, _json.dumps(inputs), actor_id
)
return {
"run_id": row["run_id"],
"output_hash": row["output_hash"],
"ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"],
"recipe": row["recipe"],
"inputs": row["inputs"],
"actor_id": row["actor_id"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
async def get_run_by_output(output_hash: str) -> Optional[dict]:
"""Get run cache entry by output hash."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
FROM run_cache WHERE output_hash = $1
""",
output_hash
)
if row:
return {
"run_id": row["run_id"],
"output_hash": row["output_hash"],
"ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"],
"recipe": row["recipe"],
"inputs": row["inputs"],
"actor_id": row["actor_id"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
return None