Add content-addressable runs and activities

- compute_run_id() for deterministic run identification
- /assets/by-run-id/{run_id} endpoint for L1 recovery
- Store run_id in provenance when recording runs
- Activities now use content hash as ID instead of UUID
- URLs: /activities/{content_hash} instead of /activities/1

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-09 11:04:59 +00:00
parent 0f01d8e12c
commit 3ed4fe89ed
2 changed files with 66 additions and 7 deletions

22
db.py
View File

@@ -65,9 +65,9 @@ CREATE TABLE IF NOT EXISTS assets (
updated_at TIMESTAMPTZ
);
-- Activities table
-- Activities table (activity_id is content-addressable run_id hash)
CREATE TABLE IF NOT EXISTS activities (
activity_id UUID PRIMARY KEY,
activity_id VARCHAR(64) PRIMARY KEY,
activity_type VARCHAR(50) NOT NULL,
actor_id TEXT NOT NULL,
object_data JSONB NOT NULL,
@@ -83,8 +83,8 @@ CREATE TABLE IF NOT EXISTS anchors (
tree_ipfs_cid VARCHAR(128),
ots_proof_cid VARCHAR(128),
activity_count INTEGER NOT NULL,
first_activity_id UUID,
last_activity_id UUID,
first_activity_id VARCHAR(64),
last_activity_id VARCHAR(64),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
confirmed_at TIMESTAMPTZ,
bitcoin_txid VARCHAR(64)
@@ -243,6 +243,20 @@ async def get_asset_by_hash(content_hash: str) -> Optional[dict]:
return None
async def get_asset_by_run_id(run_id: str) -> Optional[dict]:
"""Get asset by run_id stored in provenance."""
async with get_connection() as conn:
row = await conn.fetchrow(
"""SELECT name, content_hash, ipfs_cid, asset_type, tags, metadata, url,
provenance, description, origin, owner, created_at, updated_at
FROM assets WHERE provenance->>'run_id' = $1""",
run_id
)
if row:
return _parse_asset_row(row)
return None
async def get_all_assets() -> dict[str, dict]:
"""Get all assets as a dict indexed by name."""
async with get_connection() as conn:

View File

@@ -66,6 +66,26 @@ COOKIE_DOMAIN = _get_cookie_domain()
DATA_DIR.mkdir(parents=True, exist_ok=True)
(DATA_DIR / "assets").mkdir(exist_ok=True)
def compute_run_id(input_hashes: list[str], recipe: str, recipe_hash: str = None) -> str:
"""
Compute a deterministic run_id from inputs and recipe.
The run_id is a SHA3-256 hash of:
- Sorted input content hashes
- Recipe identifier (recipe_hash if provided, else "effect:{recipe}")
This makes runs content-addressable: same inputs + recipe = same run_id.
Must match the L1 implementation exactly.
"""
data = {
"inputs": sorted(input_hashes),
"recipe": recipe_hash or f"effect:{recipe}",
"version": "1", # For future schema changes
}
json_str = json.dumps(data, sort_keys=True, separators=(",", ":"))
return hashlib.sha3_256(json_str.encode()).hexdigest()
# Load README
README_PATH = Path(__file__).parent / "README.md"
README_CONTENT = ""
@@ -1614,6 +1634,27 @@ async def get_asset(name: str, request: Request):
return registry["assets"][name]
@app.get("/assets/by-run-id/{run_id}")
async def get_asset_by_run_id(run_id: str):
"""
Get asset by content-addressable run_id.
Returns the asset info including output_hash and ipfs_cid for L1 recovery.
The run_id is stored in the asset's provenance when the run is recorded.
"""
asset = await db.get_asset_by_run_id(run_id)
if not asset:
raise HTTPException(404, f"No asset found for run_id: {run_id}")
return {
"run_id": run_id,
"asset_name": asset.get("name"),
"output_hash": asset.get("content_hash"),
"ipfs_cid": asset.get("ipfs_cid"),
"provenance_cid": asset.get("provenance", {}).get("provenance_cid") if asset.get("provenance") else None,
}
@app.patch("/assets/{name}")
async def update_asset(name: str, req: UpdateAssetRequest, user: User = Depends(get_required_user)):
"""Update an existing asset's metadata. Creates an Update activity."""
@@ -1756,7 +1797,7 @@ async def _register_asset_impl(req: RegisterRequest, owner: str):
object_data["provenance"] = req.provenance
activity = {
"activity_id": str(uuid.uuid4()),
"activity_id": req.content_hash, # Content-addressable by content hash
"activity_type": "Create",
"actor_id": f"https://{DOMAIN}/users/{owner}",
"object_data": object_data,
@@ -1950,7 +1991,11 @@ async def record_run(req: RecordRunRequest, user: User = Depends(get_required_us
recipe_cid = ipfs_client.add_json(recipe_data)
# Build and store full provenance on IPFS
# Compute content-addressable run_id from inputs + recipe
recipe_name = recipe_data.get("name", "unknown") if isinstance(recipe_data, dict) else str(recipe_data)
run_id = compute_run_id(input_hashes, recipe_name)
provenance = {
"run_id": run_id, # Content-addressable run identifier
"inputs": registered_inputs,
"output": {
"content_hash": output_hash,
@@ -2036,7 +2081,7 @@ async def record_run(req: RecordRunRequest, user: User = Depends(get_required_us
}
activity = {
"activity_id": str(uuid.uuid4()),
"activity_id": provenance["run_id"], # Content-addressable run_id
"activity_type": "Create",
"actor_id": f"https://{DOMAIN}/users/{user.username}",
"object_data": object_data,
@@ -2157,7 +2202,7 @@ async def publish_cache(req: PublishCacheRequest, user: User = Depends(get_requi
}
activity = {
"activity_id": str(uuid.uuid4()),
"activity_id": req.content_hash, # Content-addressable by content hash
"activity_type": "Create",
"actor_id": f"https://{DOMAIN}/users/{user.username}",
"object_data": object_data,