Rename content_hash/output_hash to cid throughout

Refactor to use IPFS CID as the primary content identifier:
- Update database schema: content_hash -> cid, output_hash -> output_cid
- Update all services, routers, and tasks to use cid terminology
- Update HTML templates to display CID instead of hash
- Update cache_manager parameter names
- Update README documentation

This completes the transition to CID-only content addressing.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-12 08:02:44 +00:00
parent 494a2a8650
commit 92d26b2b72
22 changed files with 981 additions and 988 deletions

View File

@@ -120,21 +120,21 @@ class SourceExecutor(Executor):
"""Executor for SOURCE nodes - loads content from cache by hash."""
def execute(self, config: Dict, inputs: List[Path], output_path: Path) -> Path:
# Source nodes load from cache by content_hash
content_hash = config.get("content_hash")
if not content_hash:
raise ValueError("SOURCE node requires content_hash in config")
# Source nodes load from cache by cid
cid = config.get("cid")
if not cid:
raise ValueError("SOURCE node requires cid in config")
# Look up in cache
source_path = CACHE_DIR / content_hash
source_path = CACHE_DIR / cid
if not source_path.exists():
# Try nodes directory
from cache_manager import get_cache_manager
cache_manager = get_cache_manager()
source_path = cache_manager.get_by_content_hash(content_hash)
source_path = cache_manager.get_by_cid(cid)
if not source_path or not source_path.exists():
raise ValueError(f"Source content not in cache: {content_hash}")
raise ValueError(f"Source content not in cache: {cid}")
# For source nodes, we just return the path (no transformation)
# The engine will use this as input to subsequent nodes
@@ -186,7 +186,7 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
# Input comes from cache by hash (supports both legacy and new cache locations)
cache_manager = get_cache_manager()
input_path = cache_manager.get_by_content_hash(input_hash)
input_path = cache_manager.get_by_cid(input_hash)
if not input_path or not input_path.exists():
raise ValueError(f"Input not in cache: {input_hash}")
@@ -214,9 +214,9 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
raise ValueError(f"Unknown effect: {effect_name}")
# Verify output
output_hash = file_hash(result)
if output_hash != expected_hash:
raise ValueError(f"Output hash mismatch: expected {expected_hash}, got {output_hash}")
output_cid = file_hash(result)
if output_cid != expected_hash:
raise ValueError(f"Output hash mismatch: expected {expected_hash}, got {output_cid}")
# Build effect info based on source
if effect_name == "identity":
@@ -224,7 +224,7 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
artdag_commit = get_artdag_commit()
effect_info = {
"name": f"effect:{effect_name}",
"content_hash": REGISTRY[f"effect:{effect_name}"]["hash"],
"cid": REGISTRY[f"effect:{effect_name}"]["hash"],
"repo": "github",
"repo_commit": artdag_commit,
"repo_url": f"https://github.com/gilesbradshaw/art-dag/blob/{artdag_commit}/artdag/nodes/effect.py"
@@ -234,7 +234,7 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
effects_commit = get_effects_commit()
effect_info = {
"name": f"effect:{effect_name}",
"content_hash": REGISTRY[f"effect:{effect_name}"]["hash"],
"cid": REGISTRY[f"effect:{effect_name}"]["hash"],
"repo": "rose-ash",
"repo_commit": effects_commit,
"repo_url": f"https://git.rose-ash.com/art-dag/effects/src/commit/{effects_commit}/{effect_name}"
@@ -247,15 +247,15 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
"rendered_by": "@giles@artdag.rose-ash.com",
"output": {
"name": output_name,
"content_hash": output_hash,
"cid": output_cid,
},
"inputs": [
{"content_hash": input_hash}
{"cid": input_hash}
],
"effects": [effect_info],
"infrastructure": {
"software": {"name": "infra:artdag", "content_hash": REGISTRY["infra:artdag"]["hash"]},
"hardware": {"name": "infra:giles-hp", "content_hash": REGISTRY["infra:giles-hp"]["hash"]}
"software": {"name": "infra:artdag", "cid": REGISTRY["infra:artdag"]["hash"]},
"hardware": {"name": "infra:giles-hp", "cid": REGISTRY["infra:giles-hp"]["hash"]}
}
}
@@ -329,10 +329,10 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
if not result.success:
raise RuntimeError(f"DAG execution failed: {result.error}")
# Index all node outputs by content_hash and upload to IPFS
# Index all node outputs by cid and upload to IPFS
cache_manager = get_cache_manager()
output_hash = None
node_hashes = {} # node_id -> content_hash mapping
output_cid = None
node_hashes = {} # node_id -> cid mapping
node_ipfs_cids = {} # node_id -> ipfs_cid mapping
# Process all node results (intermediates + output)
@@ -341,9 +341,9 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
node = dag.nodes.get(node_id)
# Skip SOURCE nodes - they're already in cache
if node and (node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE"):
content_hash = node.config.get("content_hash")
if content_hash:
node_hashes[node_id] = content_hash
cid = node.config.get("cid")
if cid:
node_hashes[node_id] = cid
continue
# Determine node type for cache metadata
@@ -353,20 +353,20 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
else:
cache_node_type = "dag_intermediate"
# Store in cache_manager (indexes by content_hash, uploads to IPFS)
# Store in cache_manager (indexes by cid, uploads to IPFS)
cached, ipfs_cid = cache_manager.put(
Path(node_path),
node_type=cache_node_type,
node_id=node_id,
)
node_hashes[node_id] = cached.content_hash
node_hashes[node_id] = cached.cid
if ipfs_cid:
node_ipfs_cids[node_id] = ipfs_cid
logger.info(f"Cached node {node_id}: {cached.content_hash[:16]}... -> {ipfs_cid or 'no IPFS'}")
logger.info(f"Cached node {node_id}: {cached.cid[:16]}... -> {ipfs_cid or 'no IPFS'}")
# Get output hash from the output node
if result.output_path and result.output_path.exists():
output_hash = file_hash(result.output_path)
output_cid = file_hash(result.output_path)
output_ipfs_cid = node_ipfs_cids.get(dag.output_id)
# Store output in database (for L2 to query IPFS CID)
@@ -376,14 +376,14 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
async def save_to_db():
if database.pool is None:
await database.init_db()
await database.create_cache_item(output_hash, output_ipfs_cid)
await database.create_cache_item(output_cid, output_ipfs_cid)
# Also save the run result
if run_id:
input_hashes_for_db = [
node.config.get("content_hash")
node.config.get("cid")
for node in dag.nodes.values()
if (node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE")
and node.config.get("content_hash")
and node.config.get("cid")
]
# Get actor_id and recipe from pending_runs (saved when run started)
actor_id = None
@@ -395,7 +395,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
await database.save_run_cache(
run_id=run_id,
output_hash=output_hash,
output_cid=output_cid,
recipe=recipe_name,
inputs=input_hashes_for_db,
ipfs_cid=output_ipfs_cid,
@@ -405,7 +405,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
# Save output as media for the user
if actor_id:
await database.save_item_metadata(
content_hash=output_hash,
cid=output_cid,
actor_id=actor_id,
item_type="media",
description=f"Output from recipe: {recipe_name}",
@@ -431,9 +431,9 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
intermediate_hashes = []
for node_id, node in dag.nodes.items():
if node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE":
content_hash = node.config.get("content_hash")
if content_hash:
input_hashes.append(content_hash)
cid = node.config.get("cid")
if cid:
input_hashes.append(cid)
elif node_id != dag.output_id and node_id in node_hashes:
intermediate_hashes.append(node_hashes[node_id])
@@ -441,9 +441,9 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
from artdag.activities import Activity
from datetime import datetime, timezone
activity = Activity(
activity_id=run_id or f"dag-{output_hash[:16]}",
activity_id=run_id or f"dag-{output_cid[:16]}",
input_ids=sorted(input_hashes),
output_id=output_hash,
output_id=output_cid,
intermediate_ids=intermediate_hashes,
created_at=datetime.now(timezone.utc).timestamp(),
status="completed",
@@ -454,23 +454,23 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
input_hashes_for_provenance = []
for node_id, node in dag.nodes.items():
if node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE":
content_hash = node.config.get("content_hash")
if content_hash:
input_hashes_for_provenance.append({"content_hash": content_hash})
cid = node.config.get("cid")
if cid:
input_hashes_for_provenance.append({"cid": cid})
provenance = {
"task_id": self.request.id,
"run_id": run_id,
"rendered_at": datetime.now(timezone.utc).isoformat(),
"output": {
"content_hash": output_hash,
"cid": output_cid,
"ipfs_cid": node_ipfs_cids.get(dag.output_id) if dag.output_id else None,
},
"inputs": input_hashes_for_provenance,
"dag": dag_json, # Full DAG definition
"nodes": {
node_id: {
"content_hash": node_hashes.get(node_id),
"cid": node_hashes.get(node_id),
"ipfs_cid": node_ipfs_cids.get(node_id),
}
for node_id in dag.nodes.keys()
@@ -496,7 +496,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
return {
"success": True,
"run_id": run_id,
"output_hash": output_hash,
"output_cid": output_cid,
"output_ipfs_cid": node_ipfs_cids.get(dag.output_id) if dag.output_id else None,
"output_path": str(result.output_path) if result.output_path else None,
"execution_time": result.execution_time,
@@ -505,7 +505,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
"node_results": {
node_id: str(path) for node_id, path in result.node_results.items()
},
"node_hashes": node_hashes, # node_id -> content_hash
"node_hashes": node_hashes, # node_id -> cid
"node_ipfs_cids": node_ipfs_cids, # node_id -> ipfs_cid
"provenance_cid": provenance_cid,
}
@@ -526,10 +526,10 @@ def build_effect_dag(input_hashes: List[str], effect_name: str) -> DAG:
# Add source nodes for each input
source_ids = []
for i, content_hash in enumerate(input_hashes):
for i, cid in enumerate(input_hashes):
source_node = Node(
node_type=NodeType.SOURCE,
config={"content_hash": content_hash},
config={"cid": cid},
name=f"source_{i}",
)
dag.add_node(source_node)