Rename content_hash/output_hash to cid throughout
Refactor to use IPFS CID as the primary content identifier: - Update database schema: content_hash -> cid, output_hash -> output_cid - Update all services, routers, and tasks to use cid terminology - Update HTML templates to display CID instead of hash - Update cache_manager parameter names - Update README documentation This completes the transition to CID-only content addressing. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -81,8 +81,8 @@ def execute_step(
|
||||
# Get L1 cache manager (IPFS-backed)
|
||||
cache_mgr = get_cache_manager()
|
||||
|
||||
# Check if already cached (by cache_id as content_hash)
|
||||
cached_path = cache_mgr.get_by_content_hash(step.cache_id)
|
||||
# Check if already cached (by cache_id as cid)
|
||||
cached_path = cache_mgr.get_by_cid(step.cache_id)
|
||||
if cached_path:
|
||||
logger.info(f"Step {step.step_id} already cached at {cached_path}")
|
||||
|
||||
@@ -141,14 +141,14 @@ def execute_step(
|
||||
try:
|
||||
# Handle SOURCE nodes
|
||||
if step.node_type == "SOURCE":
|
||||
content_hash = step.config.get("content_hash")
|
||||
if not content_hash:
|
||||
raise ValueError(f"SOURCE step missing content_hash")
|
||||
cid = step.config.get("cid")
|
||||
if not cid:
|
||||
raise ValueError(f"SOURCE step missing cid")
|
||||
|
||||
# Look up in cache
|
||||
path = cache_mgr.get_by_content_hash(content_hash)
|
||||
path = cache_mgr.get_by_cid(cid)
|
||||
if not path:
|
||||
raise ValueError(f"SOURCE input not found in cache: {content_hash[:16]}...")
|
||||
raise ValueError(f"SOURCE input not found in cache: {cid[:16]}...")
|
||||
|
||||
output_path = str(path)
|
||||
complete_task(step.cache_id, worker_id, output_path)
|
||||
@@ -165,7 +165,7 @@ def execute_step(
|
||||
for item_id in step.config.get("items", []):
|
||||
item_cache_id = input_cache_ids.get(item_id)
|
||||
if item_cache_id:
|
||||
path = cache_mgr.get_by_content_hash(item_cache_id)
|
||||
path = cache_mgr.get_by_cid(item_cache_id)
|
||||
if path:
|
||||
item_paths.append(str(path))
|
||||
|
||||
@@ -190,7 +190,7 @@ def execute_step(
|
||||
input_cache_id = input_cache_ids.get(input_step_id)
|
||||
if not input_cache_id:
|
||||
raise ValueError(f"No cache_id for input step: {input_step_id}")
|
||||
path = cache_mgr.get_by_content_hash(input_cache_id)
|
||||
path = cache_mgr.get_by_cid(input_cache_id)
|
||||
if not path:
|
||||
raise ValueError(f"Input not in cache: {input_cache_id[:16]}...")
|
||||
input_paths.append(Path(path))
|
||||
@@ -276,7 +276,7 @@ def execute_step(
|
||||
"step_id": step.step_id,
|
||||
"cache_id": step.cache_id,
|
||||
"output_path": str(cached_file.path),
|
||||
"content_hash": cached_file.content_hash,
|
||||
"cid": cached_file.cid,
|
||||
"ipfs_cid": ipfs_cid,
|
||||
"filter_count": len(filter_chain),
|
||||
}
|
||||
@@ -298,7 +298,7 @@ def execute_step(
|
||||
if not input_cache_id:
|
||||
raise ValueError(f"No cache_id for input step: {input_step_id}")
|
||||
|
||||
path = cache_mgr.get_by_content_hash(input_cache_id)
|
||||
path = cache_mgr.get_by_cid(input_cache_id)
|
||||
if not path:
|
||||
raise ValueError(f"Input not in cache: {input_cache_id[:16]}...")
|
||||
|
||||
@@ -336,7 +336,7 @@ def execute_step(
|
||||
"media_type": output_def.media_type,
|
||||
"index": output_def.index,
|
||||
"path": str(cached_file.path),
|
||||
"content_hash": cached_file.content_hash,
|
||||
"cid": cached_file.cid,
|
||||
"ipfs_cid": ipfs_cid,
|
||||
})
|
||||
else:
|
||||
@@ -347,7 +347,7 @@ def execute_step(
|
||||
"media_type": "video/mp4",
|
||||
"index": 0,
|
||||
"path": str(cached_file.path),
|
||||
"content_hash": cached_file.content_hash,
|
||||
"cid": cached_file.cid,
|
||||
"ipfs_cid": ipfs_cid,
|
||||
})
|
||||
|
||||
@@ -362,7 +362,7 @@ def execute_step(
|
||||
"name": step.name,
|
||||
"cache_id": step.cache_id,
|
||||
"output_path": str(cached_file.path),
|
||||
"content_hash": cached_file.content_hash,
|
||||
"cid": cached_file.cid,
|
||||
"ipfs_cid": ipfs_cid,
|
||||
"outputs": outputs,
|
||||
}
|
||||
|
||||
@@ -140,7 +140,7 @@ def execute_step_sexp(
|
||||
cache_mgr = get_cache_manager()
|
||||
|
||||
# Check if already cached
|
||||
cached_path = cache_mgr.get_by_content_hash(cache_id)
|
||||
cached_path = cache_mgr.get_by_cid(cache_id)
|
||||
if cached_path:
|
||||
logger.info(f"Step {step_id} already cached at {cached_path}")
|
||||
|
||||
@@ -202,7 +202,7 @@ def execute_step_sexp(
|
||||
if not content_id:
|
||||
raise ValueError("SOURCE step missing :cid or :hash")
|
||||
|
||||
path = cache_mgr.get_by_content_hash(content_id)
|
||||
path = cache_mgr.get_by_cid(content_id)
|
||||
if not path:
|
||||
raise ValueError(f"SOURCE input not found: {content_id[:16]}...")
|
||||
|
||||
@@ -226,7 +226,7 @@ def execute_step_sexp(
|
||||
input_paths = []
|
||||
for inp in inputs:
|
||||
inp_cache_id = input_cache_ids.get(inp, inp)
|
||||
path = cache_mgr.get_by_content_hash(inp_cache_id)
|
||||
path = cache_mgr.get_by_cid(inp_cache_id)
|
||||
if not path:
|
||||
raise ValueError(f"Input not found: {inp_cache_id[:16]}...")
|
||||
input_paths.append(Path(path))
|
||||
@@ -261,7 +261,7 @@ def execute_step_sexp(
|
||||
input_paths = []
|
||||
for inp in inputs:
|
||||
inp_cache_id = input_cache_ids.get(inp, inp)
|
||||
path = cache_mgr.get_by_content_hash(inp_cache_id)
|
||||
path = cache_mgr.get_by_cid(inp_cache_id)
|
||||
if not path:
|
||||
raise ValueError(f"Input not found: {inp_cache_id[:16]}...")
|
||||
input_paths.append(Path(path))
|
||||
@@ -366,7 +366,7 @@ def execute_step_sexp(
|
||||
"step_id": step_id,
|
||||
"cache_id": cache_id,
|
||||
"output_path": str(cached_file.path),
|
||||
"content_hash": cached_file.content_hash,
|
||||
"cid": cached_file.cid,
|
||||
"ipfs_cid": ipfs_cid,
|
||||
"filter_count": len(filter_chain),
|
||||
}
|
||||
@@ -386,7 +386,7 @@ def execute_step_sexp(
|
||||
input_paths = []
|
||||
for inp in inputs:
|
||||
inp_cache_id = input_cache_ids.get(inp, inp)
|
||||
path = cache_mgr.get_by_content_hash(inp_cache_id)
|
||||
path = cache_mgr.get_by_cid(inp_cache_id)
|
||||
if not path:
|
||||
raise ValueError(f"Input not found: {inp_cache_id[:16]}...")
|
||||
input_paths.append(Path(path))
|
||||
@@ -420,7 +420,7 @@ def execute_step_sexp(
|
||||
"step_id": step_id,
|
||||
"cache_id": cache_id,
|
||||
"output_path": str(cached_file.path),
|
||||
"content_hash": cached_file.content_hash,
|
||||
"cid": cached_file.cid,
|
||||
"ipfs_cid": ipfs_cid,
|
||||
}
|
||||
|
||||
|
||||
@@ -80,8 +80,8 @@ def run_plan(
|
||||
cache_ids[step.step_id] = step.cache_id
|
||||
|
||||
# Also map input hashes
|
||||
for name, content_hash in plan.input_hashes.items():
|
||||
cache_ids[name] = content_hash
|
||||
for name, cid in plan.input_hashes.items():
|
||||
cache_ids[name] = cid
|
||||
|
||||
# Group steps by level
|
||||
steps_by_level = plan.get_steps_by_level()
|
||||
@@ -103,7 +103,7 @@ def run_plan(
|
||||
|
||||
for step in level_steps:
|
||||
# Check if cached
|
||||
cached_path = cache_mgr.get_by_content_hash(step.cache_id)
|
||||
cached_path = cache_mgr.get_by_cid(step.cache_id)
|
||||
if cached_path:
|
||||
results_by_step[step.step_id] = {
|
||||
"status": "cached",
|
||||
@@ -171,7 +171,7 @@ def run_plan(
|
||||
output_name = plan.output_name
|
||||
|
||||
if output_cache_id:
|
||||
output_path = cache_mgr.get_by_content_hash(output_cache_id)
|
||||
output_path = cache_mgr.get_by_cid(output_cache_id)
|
||||
output_ipfs_cid = cache_mgr.get_ipfs_cid(output_cache_id)
|
||||
|
||||
# Build list of all outputs with their names and artifacts
|
||||
@@ -183,7 +183,7 @@ def run_plan(
|
||||
# If no outputs in result, build from step definition
|
||||
if not step_outputs and step.outputs:
|
||||
for output_def in step.outputs:
|
||||
output_cache_path = cache_mgr.get_by_content_hash(output_def.cache_id)
|
||||
output_cache_path = cache_mgr.get_by_cid(output_def.cache_id)
|
||||
output_ipfs = cache_mgr.get_ipfs_cid(output_def.cache_id) if output_cache_path else None
|
||||
all_outputs.append({
|
||||
"name": output_def.name,
|
||||
@@ -318,28 +318,28 @@ def run_recipe(
|
||||
node_id = analysis_node["node_id"]
|
||||
|
||||
# Resolve input reference to content hash
|
||||
content_hash = input_hashes.get(input_ref)
|
||||
if not content_hash:
|
||||
cid = input_hashes.get(input_ref)
|
||||
if not cid:
|
||||
logger.warning(f"Analysis node {node_id}: input '{input_ref}' not in input_hashes")
|
||||
continue
|
||||
|
||||
path = cache_mgr.get_by_content_hash(content_hash)
|
||||
path = cache_mgr.get_by_cid(cid)
|
||||
if not path:
|
||||
logger.warning(f"Analysis node {node_id}: content {content_hash[:16]}... not in cache")
|
||||
logger.warning(f"Analysis node {node_id}: content {cid[:16]}... not in cache")
|
||||
continue
|
||||
|
||||
try:
|
||||
# Run analysis for the specific feature
|
||||
features = [feature] if feature else ["beats", "energy"]
|
||||
result = analyzer.analyze(
|
||||
input_hash=content_hash,
|
||||
input_hash=cid,
|
||||
features=features,
|
||||
input_path=Path(path),
|
||||
)
|
||||
# Store result keyed by node_id so plan can reference it
|
||||
analysis_results[node_id] = result
|
||||
# Also store by content_hash for compatibility
|
||||
analysis_results[content_hash] = result
|
||||
# Also store by cid for compatibility
|
||||
analysis_results[cid] = result
|
||||
logger.info(f"Analysis {node_id}: feature={feature}, tempo={result.tempo}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Analysis failed for {node_id}: {e}")
|
||||
@@ -380,7 +380,7 @@ def run_recipe(
|
||||
# Store in cache (content-addressed, auto-pins to IPFS)
|
||||
# Plan is just another node output - no special treatment needed
|
||||
cached, plan_ipfs_cid = cache_mgr.put(tmp_path, node_type="plan", move=True)
|
||||
logger.info(f"Plan cached: hash={cached.content_hash}, ipfs={plan_ipfs_cid}")
|
||||
logger.info(f"Plan cached: hash={cached.cid}, ipfs={plan_ipfs_cid}")
|
||||
|
||||
# Phase 4: Execute
|
||||
logger.info("Phase 4: Executing plan...")
|
||||
@@ -392,7 +392,7 @@ def run_recipe(
|
||||
"run_id": run_id,
|
||||
"recipe": compiled.name,
|
||||
"plan_id": plan.plan_id,
|
||||
"plan_cache_id": cached.content_hash,
|
||||
"plan_cache_id": cached.cid,
|
||||
"plan_ipfs_cid": plan_ipfs_cid,
|
||||
"output_path": result.get("output_path"),
|
||||
"output_cache_id": result.get("output_cache_id"),
|
||||
@@ -454,21 +454,21 @@ def generate_plan(
|
||||
feature = analysis_node["feature"]
|
||||
node_id = analysis_node["node_id"]
|
||||
|
||||
content_hash = input_hashes.get(input_ref)
|
||||
if not content_hash:
|
||||
cid = input_hashes.get(input_ref)
|
||||
if not cid:
|
||||
continue
|
||||
|
||||
path = cache_mgr.get_by_content_hash(content_hash)
|
||||
path = cache_mgr.get_by_cid(cid)
|
||||
if path:
|
||||
try:
|
||||
features = [feature] if feature else ["beats", "energy"]
|
||||
result = analyzer.analyze(
|
||||
input_hash=content_hash,
|
||||
input_hash=cid,
|
||||
features=features,
|
||||
input_path=Path(path),
|
||||
)
|
||||
analysis_results[node_id] = result
|
||||
analysis_results[content_hash] = result
|
||||
analysis_results[cid] = result
|
||||
except Exception as e:
|
||||
logger.warning(f"Analysis failed for {node_id}: {e}")
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ def register_input_cid(
|
||||
input_path: Local path to the input file
|
||||
|
||||
Returns:
|
||||
Dict with 'cid' and 'content_hash'
|
||||
Dict with 'cid' and 'cid'
|
||||
"""
|
||||
import hashlib
|
||||
|
||||
@@ -77,7 +77,7 @@ def register_input_cid(
|
||||
|
||||
# Compute content hash
|
||||
with open(path, "rb") as f:
|
||||
content_hash = hashlib.sha3_256(f.read()).hexdigest()
|
||||
cid = hashlib.sha3_256(f.read()).hexdigest()
|
||||
|
||||
# Add to IPFS
|
||||
cid = ipfs_client.add_file(path)
|
||||
@@ -89,7 +89,7 @@ def register_input_cid(
|
||||
return {
|
||||
"status": "completed",
|
||||
"cid": cid,
|
||||
"content_hash": content_hash,
|
||||
"cid": cid,
|
||||
"path": str(path),
|
||||
}
|
||||
|
||||
@@ -426,7 +426,7 @@ def run_from_local(
|
||||
return {"status": "failed", "phase": "register_input", "input": name, "error": result.get("error")}
|
||||
|
||||
input_cids[name] = result["cid"]
|
||||
input_hashes[name] = result["content_hash"]
|
||||
input_hashes[name] = result["cid"]
|
||||
|
||||
# Run the pipeline
|
||||
return run_recipe_cid.apply_async(
|
||||
|
||||
Reference in New Issue
Block a user