Rename content_hash/output_hash to cid throughout
Refactor to use IPFS CID as the primary content identifier: - Update database schema: content_hash -> cid, output_hash -> output_cid - Update all services, routers, and tasks to use cid terminology - Update HTML templates to display CID instead of hash - Update cache_manager parameter names - Update README documentation This completes the transition to CID-only content addressing. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -120,21 +120,21 @@ class SourceExecutor(Executor):
|
||||
"""Executor for SOURCE nodes - loads content from cache by hash."""
|
||||
|
||||
def execute(self, config: Dict, inputs: List[Path], output_path: Path) -> Path:
|
||||
# Source nodes load from cache by content_hash
|
||||
content_hash = config.get("content_hash")
|
||||
if not content_hash:
|
||||
raise ValueError("SOURCE node requires content_hash in config")
|
||||
# Source nodes load from cache by cid
|
||||
cid = config.get("cid")
|
||||
if not cid:
|
||||
raise ValueError("SOURCE node requires cid in config")
|
||||
|
||||
# Look up in cache
|
||||
source_path = CACHE_DIR / content_hash
|
||||
source_path = CACHE_DIR / cid
|
||||
if not source_path.exists():
|
||||
# Try nodes directory
|
||||
from cache_manager import get_cache_manager
|
||||
cache_manager = get_cache_manager()
|
||||
source_path = cache_manager.get_by_content_hash(content_hash)
|
||||
source_path = cache_manager.get_by_cid(cid)
|
||||
|
||||
if not source_path or not source_path.exists():
|
||||
raise ValueError(f"Source content not in cache: {content_hash}")
|
||||
raise ValueError(f"Source content not in cache: {cid}")
|
||||
|
||||
# For source nodes, we just return the path (no transformation)
|
||||
# The engine will use this as input to subsequent nodes
|
||||
@@ -186,7 +186,7 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
|
||||
|
||||
# Input comes from cache by hash (supports both legacy and new cache locations)
|
||||
cache_manager = get_cache_manager()
|
||||
input_path = cache_manager.get_by_content_hash(input_hash)
|
||||
input_path = cache_manager.get_by_cid(input_hash)
|
||||
if not input_path or not input_path.exists():
|
||||
raise ValueError(f"Input not in cache: {input_hash}")
|
||||
|
||||
@@ -214,9 +214,9 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
|
||||
raise ValueError(f"Unknown effect: {effect_name}")
|
||||
|
||||
# Verify output
|
||||
output_hash = file_hash(result)
|
||||
if output_hash != expected_hash:
|
||||
raise ValueError(f"Output hash mismatch: expected {expected_hash}, got {output_hash}")
|
||||
output_cid = file_hash(result)
|
||||
if output_cid != expected_hash:
|
||||
raise ValueError(f"Output hash mismatch: expected {expected_hash}, got {output_cid}")
|
||||
|
||||
# Build effect info based on source
|
||||
if effect_name == "identity":
|
||||
@@ -224,7 +224,7 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
|
||||
artdag_commit = get_artdag_commit()
|
||||
effect_info = {
|
||||
"name": f"effect:{effect_name}",
|
||||
"content_hash": REGISTRY[f"effect:{effect_name}"]["hash"],
|
||||
"cid": REGISTRY[f"effect:{effect_name}"]["hash"],
|
||||
"repo": "github",
|
||||
"repo_commit": artdag_commit,
|
||||
"repo_url": f"https://github.com/gilesbradshaw/art-dag/blob/{artdag_commit}/artdag/nodes/effect.py"
|
||||
@@ -234,7 +234,7 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
|
||||
effects_commit = get_effects_commit()
|
||||
effect_info = {
|
||||
"name": f"effect:{effect_name}",
|
||||
"content_hash": REGISTRY[f"effect:{effect_name}"]["hash"],
|
||||
"cid": REGISTRY[f"effect:{effect_name}"]["hash"],
|
||||
"repo": "rose-ash",
|
||||
"repo_commit": effects_commit,
|
||||
"repo_url": f"https://git.rose-ash.com/art-dag/effects/src/commit/{effects_commit}/{effect_name}"
|
||||
@@ -247,15 +247,15 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
|
||||
"rendered_by": "@giles@artdag.rose-ash.com",
|
||||
"output": {
|
||||
"name": output_name,
|
||||
"content_hash": output_hash,
|
||||
"cid": output_cid,
|
||||
},
|
||||
"inputs": [
|
||||
{"content_hash": input_hash}
|
||||
{"cid": input_hash}
|
||||
],
|
||||
"effects": [effect_info],
|
||||
"infrastructure": {
|
||||
"software": {"name": "infra:artdag", "content_hash": REGISTRY["infra:artdag"]["hash"]},
|
||||
"hardware": {"name": "infra:giles-hp", "content_hash": REGISTRY["infra:giles-hp"]["hash"]}
|
||||
"software": {"name": "infra:artdag", "cid": REGISTRY["infra:artdag"]["hash"]},
|
||||
"hardware": {"name": "infra:giles-hp", "cid": REGISTRY["infra:giles-hp"]["hash"]}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -329,10 +329,10 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
|
||||
if not result.success:
|
||||
raise RuntimeError(f"DAG execution failed: {result.error}")
|
||||
|
||||
# Index all node outputs by content_hash and upload to IPFS
|
||||
# Index all node outputs by cid and upload to IPFS
|
||||
cache_manager = get_cache_manager()
|
||||
output_hash = None
|
||||
node_hashes = {} # node_id -> content_hash mapping
|
||||
output_cid = None
|
||||
node_hashes = {} # node_id -> cid mapping
|
||||
node_ipfs_cids = {} # node_id -> ipfs_cid mapping
|
||||
|
||||
# Process all node results (intermediates + output)
|
||||
@@ -341,9 +341,9 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
|
||||
node = dag.nodes.get(node_id)
|
||||
# Skip SOURCE nodes - they're already in cache
|
||||
if node and (node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE"):
|
||||
content_hash = node.config.get("content_hash")
|
||||
if content_hash:
|
||||
node_hashes[node_id] = content_hash
|
||||
cid = node.config.get("cid")
|
||||
if cid:
|
||||
node_hashes[node_id] = cid
|
||||
continue
|
||||
|
||||
# Determine node type for cache metadata
|
||||
@@ -353,20 +353,20 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
|
||||
else:
|
||||
cache_node_type = "dag_intermediate"
|
||||
|
||||
# Store in cache_manager (indexes by content_hash, uploads to IPFS)
|
||||
# Store in cache_manager (indexes by cid, uploads to IPFS)
|
||||
cached, ipfs_cid = cache_manager.put(
|
||||
Path(node_path),
|
||||
node_type=cache_node_type,
|
||||
node_id=node_id,
|
||||
)
|
||||
node_hashes[node_id] = cached.content_hash
|
||||
node_hashes[node_id] = cached.cid
|
||||
if ipfs_cid:
|
||||
node_ipfs_cids[node_id] = ipfs_cid
|
||||
logger.info(f"Cached node {node_id}: {cached.content_hash[:16]}... -> {ipfs_cid or 'no IPFS'}")
|
||||
logger.info(f"Cached node {node_id}: {cached.cid[:16]}... -> {ipfs_cid or 'no IPFS'}")
|
||||
|
||||
# Get output hash from the output node
|
||||
if result.output_path and result.output_path.exists():
|
||||
output_hash = file_hash(result.output_path)
|
||||
output_cid = file_hash(result.output_path)
|
||||
output_ipfs_cid = node_ipfs_cids.get(dag.output_id)
|
||||
|
||||
# Store output in database (for L2 to query IPFS CID)
|
||||
@@ -376,14 +376,14 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
|
||||
async def save_to_db():
|
||||
if database.pool is None:
|
||||
await database.init_db()
|
||||
await database.create_cache_item(output_hash, output_ipfs_cid)
|
||||
await database.create_cache_item(output_cid, output_ipfs_cid)
|
||||
# Also save the run result
|
||||
if run_id:
|
||||
input_hashes_for_db = [
|
||||
node.config.get("content_hash")
|
||||
node.config.get("cid")
|
||||
for node in dag.nodes.values()
|
||||
if (node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE")
|
||||
and node.config.get("content_hash")
|
||||
and node.config.get("cid")
|
||||
]
|
||||
# Get actor_id and recipe from pending_runs (saved when run started)
|
||||
actor_id = None
|
||||
@@ -395,7 +395,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
|
||||
|
||||
await database.save_run_cache(
|
||||
run_id=run_id,
|
||||
output_hash=output_hash,
|
||||
output_cid=output_cid,
|
||||
recipe=recipe_name,
|
||||
inputs=input_hashes_for_db,
|
||||
ipfs_cid=output_ipfs_cid,
|
||||
@@ -405,7 +405,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
|
||||
# Save output as media for the user
|
||||
if actor_id:
|
||||
await database.save_item_metadata(
|
||||
content_hash=output_hash,
|
||||
cid=output_cid,
|
||||
actor_id=actor_id,
|
||||
item_type="media",
|
||||
description=f"Output from recipe: {recipe_name}",
|
||||
@@ -431,9 +431,9 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
|
||||
intermediate_hashes = []
|
||||
for node_id, node in dag.nodes.items():
|
||||
if node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE":
|
||||
content_hash = node.config.get("content_hash")
|
||||
if content_hash:
|
||||
input_hashes.append(content_hash)
|
||||
cid = node.config.get("cid")
|
||||
if cid:
|
||||
input_hashes.append(cid)
|
||||
elif node_id != dag.output_id and node_id in node_hashes:
|
||||
intermediate_hashes.append(node_hashes[node_id])
|
||||
|
||||
@@ -441,9 +441,9 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
|
||||
from artdag.activities import Activity
|
||||
from datetime import datetime, timezone
|
||||
activity = Activity(
|
||||
activity_id=run_id or f"dag-{output_hash[:16]}",
|
||||
activity_id=run_id or f"dag-{output_cid[:16]}",
|
||||
input_ids=sorted(input_hashes),
|
||||
output_id=output_hash,
|
||||
output_id=output_cid,
|
||||
intermediate_ids=intermediate_hashes,
|
||||
created_at=datetime.now(timezone.utc).timestamp(),
|
||||
status="completed",
|
||||
@@ -454,23 +454,23 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
|
||||
input_hashes_for_provenance = []
|
||||
for node_id, node in dag.nodes.items():
|
||||
if node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE":
|
||||
content_hash = node.config.get("content_hash")
|
||||
if content_hash:
|
||||
input_hashes_for_provenance.append({"content_hash": content_hash})
|
||||
cid = node.config.get("cid")
|
||||
if cid:
|
||||
input_hashes_for_provenance.append({"cid": cid})
|
||||
|
||||
provenance = {
|
||||
"task_id": self.request.id,
|
||||
"run_id": run_id,
|
||||
"rendered_at": datetime.now(timezone.utc).isoformat(),
|
||||
"output": {
|
||||
"content_hash": output_hash,
|
||||
"cid": output_cid,
|
||||
"ipfs_cid": node_ipfs_cids.get(dag.output_id) if dag.output_id else None,
|
||||
},
|
||||
"inputs": input_hashes_for_provenance,
|
||||
"dag": dag_json, # Full DAG definition
|
||||
"nodes": {
|
||||
node_id: {
|
||||
"content_hash": node_hashes.get(node_id),
|
||||
"cid": node_hashes.get(node_id),
|
||||
"ipfs_cid": node_ipfs_cids.get(node_id),
|
||||
}
|
||||
for node_id in dag.nodes.keys()
|
||||
@@ -496,7 +496,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
|
||||
return {
|
||||
"success": True,
|
||||
"run_id": run_id,
|
||||
"output_hash": output_hash,
|
||||
"output_cid": output_cid,
|
||||
"output_ipfs_cid": node_ipfs_cids.get(dag.output_id) if dag.output_id else None,
|
||||
"output_path": str(result.output_path) if result.output_path else None,
|
||||
"execution_time": result.execution_time,
|
||||
@@ -505,7 +505,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
|
||||
"node_results": {
|
||||
node_id: str(path) for node_id, path in result.node_results.items()
|
||||
},
|
||||
"node_hashes": node_hashes, # node_id -> content_hash
|
||||
"node_hashes": node_hashes, # node_id -> cid
|
||||
"node_ipfs_cids": node_ipfs_cids, # node_id -> ipfs_cid
|
||||
"provenance_cid": provenance_cid,
|
||||
}
|
||||
@@ -526,10 +526,10 @@ def build_effect_dag(input_hashes: List[str], effect_name: str) -> DAG:
|
||||
|
||||
# Add source nodes for each input
|
||||
source_ids = []
|
||||
for i, content_hash in enumerate(input_hashes):
|
||||
for i, cid in enumerate(input_hashes):
|
||||
source_node = Node(
|
||||
node_type=NodeType.SOURCE,
|
||||
config={"content_hash": content_hash},
|
||||
config={"cid": cid},
|
||||
name=f"source_{i}",
|
||||
)
|
||||
dag.add_node(source_node)
|
||||
|
||||
Reference in New Issue
Block a user