Rename content_hash/output_hash to cid throughout

Refactor to use IPFS CID as the primary content identifier: - Update database schema: content_hash -> cid, output_hash -> output_cid - Update all services, routers, and tasks to use cid terminology - Update HTML templates to display CID instead of hash - Update cache_manager parameter names - Update README documentation This completes the transition to CID-only content addressing. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 08:02:44 +00:00
parent 494a2a8650
commit 92d26b2b72
22 changed files with 981 additions and 988 deletions
--- a/tasks/orchestrate.py
+++ b/tasks/orchestrate.py
@@ -80,8 +80,8 @@ def run_plan(
        cache_ids[step.step_id] = step.cache_id

    # Also map input hashes
-    for name, content_hash in plan.input_hashes.items():
-        cache_ids[name] = content_hash
+    for name, cid in plan.input_hashes.items():
+        cache_ids[name] = cid

    # Group steps by level
    steps_by_level = plan.get_steps_by_level()
@@ -103,7 +103,7 @@ def run_plan(

        for step in level_steps:
            # Check if cached
-            cached_path = cache_mgr.get_by_content_hash(step.cache_id)
+            cached_path = cache_mgr.get_by_cid(step.cache_id)
            if cached_path:
                results_by_step[step.step_id] = {
                    "status": "cached",
@@ -171,7 +171,7 @@ def run_plan(
    output_name = plan.output_name

    if output_cache_id:
-        output_path = cache_mgr.get_by_content_hash(output_cache_id)
+        output_path = cache_mgr.get_by_cid(output_cache_id)
        output_ipfs_cid = cache_mgr.get_ipfs_cid(output_cache_id)

    # Build list of all outputs with their names and artifacts
@@ -183,7 +183,7 @@ def run_plan(
        # If no outputs in result, build from step definition
        if not step_outputs and step.outputs:
            for output_def in step.outputs:
-                output_cache_path = cache_mgr.get_by_content_hash(output_def.cache_id)
+                output_cache_path = cache_mgr.get_by_cid(output_def.cache_id)
                output_ipfs = cache_mgr.get_ipfs_cid(output_def.cache_id) if output_cache_path else None
                all_outputs.append({
                    "name": output_def.name,
@@ -318,28 +318,28 @@ def run_recipe(
        node_id = analysis_node["node_id"]

        # Resolve input reference to content hash
-        content_hash = input_hashes.get(input_ref)
-        if not content_hash:
+        cid = input_hashes.get(input_ref)
+        if not cid:
            logger.warning(f"Analysis node {node_id}: input '{input_ref}' not in input_hashes")
            continue

-        path = cache_mgr.get_by_content_hash(content_hash)
+        path = cache_mgr.get_by_cid(cid)
        if not path:
-            logger.warning(f"Analysis node {node_id}: content {content_hash[:16]}... not in cache")
+            logger.warning(f"Analysis node {node_id}: content {cid[:16]}... not in cache")
            continue

        try:
            # Run analysis for the specific feature
            features = [feature] if feature else ["beats", "energy"]
            result = analyzer.analyze(
-                input_hash=content_hash,
+                input_hash=cid,
                features=features,
                input_path=Path(path),
            )
            # Store result keyed by node_id so plan can reference it
            analysis_results[node_id] = result
-            # Also store by content_hash for compatibility
-            analysis_results[content_hash] = result
+            # Also store by cid for compatibility
+            analysis_results[cid] = result
            logger.info(f"Analysis {node_id}: feature={feature}, tempo={result.tempo}")
        except Exception as e:
            logger.warning(f"Analysis failed for {node_id}: {e}")
@@ -380,7 +380,7 @@ def run_recipe(
    # Store in cache (content-addressed, auto-pins to IPFS)
    # Plan is just another node output - no special treatment needed
    cached, plan_ipfs_cid = cache_mgr.put(tmp_path, node_type="plan", move=True)
-    logger.info(f"Plan cached: hash={cached.content_hash}, ipfs={plan_ipfs_cid}")
+    logger.info(f"Plan cached: hash={cached.cid}, ipfs={plan_ipfs_cid}")

    # Phase 4: Execute
    logger.info("Phase 4: Executing plan...")
@@ -392,7 +392,7 @@ def run_recipe(
        "run_id": run_id,
        "recipe": compiled.name,
        "plan_id": plan.plan_id,
-        "plan_cache_id": cached.content_hash,
+        "plan_cache_id": cached.cid,
        "plan_ipfs_cid": plan_ipfs_cid,
        "output_path": result.get("output_path"),
        "output_cache_id": result.get("output_cache_id"),
@@ -454,21 +454,21 @@ def generate_plan(
        feature = analysis_node["feature"]
        node_id = analysis_node["node_id"]

-        content_hash = input_hashes.get(input_ref)
-        if not content_hash:
+        cid = input_hashes.get(input_ref)
+        if not cid:
            continue

-        path = cache_mgr.get_by_content_hash(content_hash)
+        path = cache_mgr.get_by_cid(cid)
        if path:
            try:
                features = [feature] if feature else ["beats", "energy"]
                result = analyzer.analyze(
-                    input_hash=content_hash,
+                    input_hash=cid,
                    features=features,
                    input_path=Path(path),
                )
                analysis_results[node_id] = result
-                analysis_results[content_hash] = result
+                analysis_results[cid] = result
            except Exception as e:
                logger.warning(f"Analysis failed for {node_id}: {e}")