Rename content_hash/output_hash to cid throughout

Refactor to use IPFS CID as the primary content identifier:
- Update database schema: content_hash -> cid, output_hash -> output_cid
- Update all services, routers, and tasks to use cid terminology
- Update HTML templates to display CID instead of hash
- Update cache_manager parameter names
- Update README documentation

This completes the transition to CID-only content addressing.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-12 08:02:44 +00:00
parent 494a2a8650
commit 92d26b2b72
22 changed files with 981 additions and 988 deletions

View File

@@ -80,8 +80,8 @@ def run_plan(
cache_ids[step.step_id] = step.cache_id
# Also map input hashes
for name, content_hash in plan.input_hashes.items():
cache_ids[name] = content_hash
for name, cid in plan.input_hashes.items():
cache_ids[name] = cid
# Group steps by level
steps_by_level = plan.get_steps_by_level()
@@ -103,7 +103,7 @@ def run_plan(
for step in level_steps:
# Check if cached
cached_path = cache_mgr.get_by_content_hash(step.cache_id)
cached_path = cache_mgr.get_by_cid(step.cache_id)
if cached_path:
results_by_step[step.step_id] = {
"status": "cached",
@@ -171,7 +171,7 @@ def run_plan(
output_name = plan.output_name
if output_cache_id:
output_path = cache_mgr.get_by_content_hash(output_cache_id)
output_path = cache_mgr.get_by_cid(output_cache_id)
output_ipfs_cid = cache_mgr.get_ipfs_cid(output_cache_id)
# Build list of all outputs with their names and artifacts
@@ -183,7 +183,7 @@ def run_plan(
# If no outputs in result, build from step definition
if not step_outputs and step.outputs:
for output_def in step.outputs:
output_cache_path = cache_mgr.get_by_content_hash(output_def.cache_id)
output_cache_path = cache_mgr.get_by_cid(output_def.cache_id)
output_ipfs = cache_mgr.get_ipfs_cid(output_def.cache_id) if output_cache_path else None
all_outputs.append({
"name": output_def.name,
@@ -318,28 +318,28 @@ def run_recipe(
node_id = analysis_node["node_id"]
# Resolve input reference to content hash
content_hash = input_hashes.get(input_ref)
if not content_hash:
cid = input_hashes.get(input_ref)
if not cid:
logger.warning(f"Analysis node {node_id}: input '{input_ref}' not in input_hashes")
continue
path = cache_mgr.get_by_content_hash(content_hash)
path = cache_mgr.get_by_cid(cid)
if not path:
logger.warning(f"Analysis node {node_id}: content {content_hash[:16]}... not in cache")
logger.warning(f"Analysis node {node_id}: content {cid[:16]}... not in cache")
continue
try:
# Run analysis for the specific feature
features = [feature] if feature else ["beats", "energy"]
result = analyzer.analyze(
input_hash=content_hash,
input_hash=cid,
features=features,
input_path=Path(path),
)
# Store result keyed by node_id so plan can reference it
analysis_results[node_id] = result
# Also store by content_hash for compatibility
analysis_results[content_hash] = result
# Also store by cid for compatibility
analysis_results[cid] = result
logger.info(f"Analysis {node_id}: feature={feature}, tempo={result.tempo}")
except Exception as e:
logger.warning(f"Analysis failed for {node_id}: {e}")
@@ -380,7 +380,7 @@ def run_recipe(
# Store in cache (content-addressed, auto-pins to IPFS)
# Plan is just another node output - no special treatment needed
cached, plan_ipfs_cid = cache_mgr.put(tmp_path, node_type="plan", move=True)
logger.info(f"Plan cached: hash={cached.content_hash}, ipfs={plan_ipfs_cid}")
logger.info(f"Plan cached: hash={cached.cid}, ipfs={plan_ipfs_cid}")
# Phase 4: Execute
logger.info("Phase 4: Executing plan...")
@@ -392,7 +392,7 @@ def run_recipe(
"run_id": run_id,
"recipe": compiled.name,
"plan_id": plan.plan_id,
"plan_cache_id": cached.content_hash,
"plan_cache_id": cached.cid,
"plan_ipfs_cid": plan_ipfs_cid,
"output_path": result.get("output_path"),
"output_cache_id": result.get("output_cache_id"),
@@ -454,21 +454,21 @@ def generate_plan(
feature = analysis_node["feature"]
node_id = analysis_node["node_id"]
content_hash = input_hashes.get(input_ref)
if not content_hash:
cid = input_hashes.get(input_ref)
if not cid:
continue
path = cache_mgr.get_by_content_hash(content_hash)
path = cache_mgr.get_by_cid(cid)
if path:
try:
features = [feature] if feature else ["beats", "energy"]
result = analyzer.analyze(
input_hash=content_hash,
input_hash=cid,
features=features,
input_path=Path(path),
)
analysis_results[node_id] = result
analysis_results[content_hash] = result
analysis_results[cid] = result
except Exception as e:
logger.warning(f"Analysis failed for {node_id}: {e}")