Rename content_hash/output_hash to cid throughout

Refactor to use IPFS CID as the primary content identifier:
- Update database schema: content_hash -> cid, output_hash -> output_cid
- Update all services, routers, and tasks to use cid terminology
- Update HTML templates to display CID instead of hash
- Update cache_manager parameter names
- Update README documentation

This completes the transition to CID-only content addressing.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-12 08:02:44 +00:00
parent 494a2a8650
commit 92d26b2b72
22 changed files with 981 additions and 988 deletions

View File

@@ -91,26 +91,26 @@ class CacheService:
self.cache = cache_manager
self.cache_dir = Path(os.environ.get("CACHE_DIR", "/tmp/artdag-cache"))
async def get_cache_item(self, content_hash: str) -> Optional[Dict[str, Any]]:
async def get_cache_item(self, cid: str) -> Optional[Dict[str, Any]]:
"""Get cached item with full metadata for display."""
# Check if content exists
if not self.cache.has_content(content_hash):
if not self.cache.has_content(cid):
return None
path = self.cache.get_by_content_hash(content_hash)
path = self.cache.get_by_cid(cid)
if not path or not path.exists():
return None
# Get metadata from database
meta = await self.db.load_item_metadata(content_hash, None)
cache_item = await self.db.get_cache_item(content_hash)
meta = await self.db.load_item_metadata(cid, None)
cache_item = await self.db.get_cache_item(cid)
media_type = detect_media_type(path)
mime_type = get_mime_type(path)
size = path.stat().st_size
return {
"content_hash": content_hash,
"cid": cid,
"path": str(path),
"media_type": media_type,
"mime_type": mime_type,
@@ -119,10 +119,10 @@ class CacheService:
"meta": meta,
}
async def check_access(self, content_hash: str, actor_id: str, username: str) -> bool:
async def check_access(self, cid: str, actor_id: str, username: str) -> bool:
"""Check if user has access to content."""
user_hashes = await self._get_user_cache_hashes(username, actor_id)
return content_hash in user_hashes
return cid in user_hashes
async def _get_user_cache_hashes(self, username: str, actor_id: Optional[str] = None) -> set:
"""Get all cache hashes owned by or associated with a user."""
@@ -137,7 +137,7 @@ class CacheService:
try:
db_items = await self.db.get_user_items(actor_id)
for item in db_items:
hashes.add(item["content_hash"])
hashes.add(item["cid"])
except Exception:
pass
@@ -160,8 +160,8 @@ class CacheService:
if isinstance(inputs, dict):
inputs = list(inputs.values())
hashes.update(inputs)
if run.get("output_hash"):
hashes.add(run["output_hash"])
if run.get("output_cid"):
hashes.add(run["output_cid"])
return hashes
@@ -188,12 +188,12 @@ class CacheService:
return runs
async def get_raw_file(self, content_hash: str) -> Tuple[Optional[Path], Optional[str], Optional[str]]:
async def get_raw_file(self, cid: str) -> Tuple[Optional[Path], Optional[str], Optional[str]]:
"""Get raw file path, media type, and filename for download."""
if not self.cache.has_content(content_hash):
if not self.cache.has_content(cid):
return None, None, None
path = self.cache.get_by_content_hash(content_hash)
path = self.cache.get_by_cid(cid)
if not path or not path.exists():
return None, None, None
@@ -223,17 +223,17 @@ class CacheService:
except Exception:
ext = "jpg"
filename = f"{content_hash}.{ext}"
filename = f"{cid}.{ext}"
return path, mime, filename
async def get_as_mp4(self, content_hash: str) -> Tuple[Optional[Path], Optional[str]]:
async def get_as_mp4(self, cid: str) -> Tuple[Optional[Path], Optional[str]]:
"""Get content as MP4, transcoding if necessary. Returns (path, error)."""
if not self.cache.has_content(content_hash):
return None, f"Content {content_hash} not in cache"
if not self.cache.has_content(cid):
return None, f"Content {cid} not in cache"
path = self.cache.get_by_content_hash(content_hash)
path = self.cache.get_by_cid(cid)
if not path or not path.exists():
return None, f"Content {content_hash} not in cache"
return None, f"Content {cid} not in cache"
# Check if video
media_type = detect_media_type(path)
@@ -241,7 +241,7 @@ class CacheService:
return None, "Content is not a video"
# Check for cached MP4
mp4_path = self.cache_dir / f"{content_hash}.mp4"
mp4_path = self.cache_dir / f"{cid}.mp4"
if mp4_path.exists():
return mp4_path, None
@@ -258,7 +258,7 @@ class CacheService:
pass
# Transcode to MP4
transcode_path = self.cache_dir / f"{content_hash}.transcoding.mp4"
transcode_path = self.cache_dir / f"{cid}.transcoding.mp4"
try:
result = subprocess.run(
["ffmpeg", "-y", "-i", str(path),
@@ -283,15 +283,15 @@ class CacheService:
transcode_path.unlink()
return None, f"Transcoding failed: {e}"
async def get_metadata(self, content_hash: str, actor_id: str) -> Optional[Dict[str, Any]]:
async def get_metadata(self, cid: str, actor_id: str) -> Optional[Dict[str, Any]]:
"""Get content metadata."""
if not self.cache.has_content(content_hash):
if not self.cache.has_content(cid):
return None
return await self.db.load_item_metadata(content_hash, actor_id)
return await self.db.load_item_metadata(cid, actor_id)
async def update_metadata(
self,
content_hash: str,
cid: str,
actor_id: str,
title: str = None,
description: str = None,
@@ -299,7 +299,7 @@ class CacheService:
custom: Dict[str, Any] = None,
) -> Tuple[bool, Optional[str]]:
"""Update content metadata. Returns (success, error)."""
if not self.cache.has_content(content_hash):
if not self.cache.has_content(cid):
return False, "Content not found"
# Build update dict
@@ -314,28 +314,28 @@ class CacheService:
updates["custom"] = custom
try:
await self.db.update_item_metadata(content_hash, actor_id, **updates)
await self.db.update_item_metadata(cid, actor_id, **updates)
return True, None
except Exception as e:
return False, str(e)
async def publish_to_l2(
self,
content_hash: str,
cid: str,
actor_id: str,
l2_server: str,
auth_token: str,
) -> Tuple[Optional[str], Optional[str]]:
"""Publish content to L2 and IPFS. Returns (ipfs_cid, error)."""
if not self.cache.has_content(content_hash):
if not self.cache.has_content(cid):
return None, "Content not found"
# Get IPFS CID
cache_item = await self.db.get_cache_item(content_hash)
cache_item = await self.db.get_cache_item(cid)
ipfs_cid = cache_item.get("ipfs_cid") if cache_item else None
# Get metadata for origin info
meta = await self.db.load_item_metadata(content_hash, actor_id)
meta = await self.db.load_item_metadata(cid, actor_id)
origin = meta.get("origin") if meta else None
if not origin or "type" not in origin:
@@ -351,10 +351,10 @@ class CacheService:
f"{l2_server}/assets/publish-cache",
headers={"Authorization": f"Bearer {auth_token}"},
json={
"content_hash": content_hash,
"cid": cid,
"ipfs_cid": ipfs_cid,
"asset_name": meta.get("title") or content_hash[:16],
"asset_type": detect_media_type(self.cache.get_by_content_hash(content_hash)),
"asset_name": meta.get("title") or cid[:16],
"asset_type": detect_media_type(self.cache.get_by_cid(cid)),
"origin": origin,
"description": meta.get("description"),
"tags": meta.get("tags", []),
@@ -374,14 +374,14 @@ class CacheService:
# Update local metadata with publish status
await self.db.save_l2_share(
content_hash=content_hash,
cid=cid,
actor_id=actor_id,
l2_server=l2_server,
asset_name=meta.get("title") or content_hash[:16],
content_type=detect_media_type(self.cache.get_by_content_hash(content_hash))
asset_name=meta.get("title") or cid[:16],
content_type=detect_media_type(self.cache.get_by_cid(cid))
)
await self.db.update_item_metadata(
content_hash=content_hash,
cid=cid,
actor_id=actor_id,
pinned=True,
pin_reason="published"
@@ -389,37 +389,37 @@ class CacheService:
return l2_result.get("ipfs_cid") or ipfs_cid, None
async def delete_content(self, content_hash: str, actor_id: str) -> Tuple[bool, Optional[str]]:
async def delete_content(self, cid: str, actor_id: str) -> Tuple[bool, Optional[str]]:
"""Delete content from cache. Returns (success, error)."""
if not self.cache.has_content(content_hash):
if not self.cache.has_content(cid):
return False, "Content not found"
# Check if pinned
meta = await self.db.load_item_metadata(content_hash, actor_id)
meta = await self.db.load_item_metadata(cid, actor_id)
if meta and meta.get("pinned"):
pin_reason = meta.get("pin_reason", "unknown")
return False, f"Cannot discard pinned item (reason: {pin_reason})"
# Check deletion rules via cache_manager
can_delete, reason = self.cache.can_delete(content_hash)
can_delete, reason = self.cache.can_delete(cid)
if not can_delete:
return False, f"Cannot discard: {reason}"
# Delete via cache_manager
success, msg = self.cache.delete_by_content_hash(content_hash)
success, msg = self.cache.delete_by_cid(cid)
# Clean up legacy metadata files
meta_path = self.cache_dir / f"{content_hash}.meta.json"
meta_path = self.cache_dir / f"{cid}.meta.json"
if meta_path.exists():
meta_path.unlink()
mp4_path = self.cache_dir / f"{content_hash}.mp4"
mp4_path = self.cache_dir / f"{cid}.mp4"
if mp4_path.exists():
mp4_path.unlink()
return True, None
async def import_from_ipfs(self, ipfs_cid: str, actor_id: str) -> Tuple[Optional[str], Optional[str]]:
"""Import content from IPFS. Returns (content_hash, error)."""
"""Import content from IPFS. Returns (cid, error)."""
try:
import ipfs_client
@@ -433,18 +433,18 @@ class CacheService:
# Store in cache
cached, _ = self.cache.put(tmp_path, node_type="import", move=True)
content_hash = cached.content_hash
cid = cached.cid
# Save to database
await self.db.create_cache_item(content_hash, ipfs_cid)
await self.db.create_cache_item(cid, ipfs_cid)
await self.db.save_item_metadata(
content_hash=content_hash,
cid=cid,
actor_id=actor_id,
item_type="media",
filename=f"ipfs-{ipfs_cid[:16]}"
)
return content_hash, None
return cid, None
except Exception as e:
return None, f"Import failed: {e}"
@@ -454,7 +454,7 @@ class CacheService:
filename: str,
actor_id: str,
) -> Tuple[Optional[str], Optional[str], Optional[str]]:
"""Upload content to cache. Returns (content_hash, ipfs_cid, error)."""
"""Upload content to cache. Returns (cid, ipfs_cid, error)."""
import tempfile
try:
@@ -468,18 +468,18 @@ class CacheService:
# Store in cache (also stores in IPFS)
cached, ipfs_cid = self.cache.put(tmp_path, node_type="upload", move=True)
content_hash = cached.content_hash
cid = cached.cid
# Save to database with detected MIME type
await self.db.create_cache_item(content_hash, ipfs_cid)
await self.db.create_cache_item(cid, ipfs_cid)
await self.db.save_item_metadata(
content_hash=content_hash,
cid=cid,
actor_id=actor_id,
item_type=mime_type, # Store actual MIME type
filename=filename
)
return content_hash, ipfs_cid, None
return cid, ipfs_cid, None
except Exception as e:
return None, None, f"Upload failed: {e}"
@@ -502,10 +502,10 @@ class CacheService:
return items
# Legacy compatibility methods
def has_content(self, content_hash: str) -> bool:
def has_content(self, cid: str) -> bool:
"""Check if content exists in cache."""
return self.cache.has_content(content_hash)
return self.cache.has_content(cid)
def get_ipfs_cid(self, content_hash: str) -> Optional[str]:
def get_ipfs_cid(self, cid: str) -> Optional[str]:
"""Get IPFS CID for cached content."""
return self.cache.get_ipfs_cid(content_hash)
return self.cache.get_ipfs_cid(cid)

View File

@@ -27,7 +27,7 @@ class RecipeService:
async def get_recipe(self, recipe_id: str) -> Optional[Dict[str, Any]]:
"""Get a recipe by ID (content hash)."""
# Get from cache (content-addressed storage)
path = self.cache.get_by_content_hash(recipe_id)
path = self.cache.get_by_cid(recipe_id)
if not path or not path.exists():
return None
@@ -70,8 +70,8 @@ class RecipeService:
if hasattr(self.cache, 'list_by_type'):
items = self.cache.list_by_type('recipe')
logger.info(f"Found {len(items)} recipes in cache")
for content_hash in items:
recipe = await self.get_recipe(content_hash)
for cid in items:
recipe = await self.get_recipe(cid)
if recipe and not recipe.get("error"):
owner = recipe.get("owner")
# Filter by actor - L1 is per-user
@@ -114,7 +114,7 @@ class RecipeService:
# Store in cache (content-addressed, auto-pins to IPFS)
cached, ipfs_cid = self.cache.put(tmp_path, node_type="recipe", move=True)
recipe_id = cached.content_hash
recipe_id = cached.cid
return recipe_id, None
@@ -140,12 +140,12 @@ class RecipeService:
# Delete from cache
try:
if hasattr(self.cache, 'delete_by_content_hash'):
success, msg = self.cache.delete_by_content_hash(recipe_id)
if hasattr(self.cache, 'delete_by_cid'):
success, msg = self.cache.delete_by_cid(recipe_id)
if not success:
return False, msg
else:
path = self.cache.get_by_content_hash(recipe_id)
path = self.cache.get_by_cid(recipe_id)
if path and path.exists():
path.unlink()

View File

@@ -122,7 +122,7 @@ class RunService:
"status": "completed",
"recipe": cached.get("recipe"),
"inputs": self._ensure_inputs_list(cached.get("inputs")),
"output_hash": cached.get("output_hash"),
"output_cid": cached.get("output_cid"),
"ipfs_cid": cached.get("ipfs_cid"),
"provenance_cid": cached.get("provenance_cid"),
"actor_id": cached.get("actor_id"),
@@ -171,7 +171,7 @@ class RunService:
run_data["status"] = "completed"
task_result = result.result
if isinstance(task_result, dict):
run_data["output_hash"] = task_result.get("output_hash")
run_data["output_cid"] = task_result.get("output_cid")
else:
run_data["status"] = "failed"
run_data["error"] = str(result.result)
@@ -258,7 +258,7 @@ class RunService:
run_data["status"] = "completed"
task_result = result.result
if isinstance(task_result, dict):
run_data["output_hash"] = task_result.get("output_hash")
run_data["output_cid"] = task_result.get("output_cid")
else:
run_data["status"] = "failed"
run_data["error"] = str(result.result)
@@ -332,15 +332,15 @@ class RunService:
# Check database cache first (completed runs)
cached_run = await self.db.get_run_cache(run_id)
if cached_run:
output_hash = cached_run.get("output_hash")
if output_hash and self.cache.has_content(output_hash):
output_cid = cached_run.get("output_cid")
if output_cid and self.cache.has_content(output_cid):
return {
"run_id": run_id,
"status": "completed",
"recipe": recipe,
"inputs": input_list,
"output_name": output_name,
"output_hash": output_hash,
"output_cid": output_cid,
"ipfs_cid": cached_run.get("ipfs_cid"),
"provenance_cid": cached_run.get("provenance_cid"),
"created_at": cached_run.get("created_at"),
@@ -355,20 +355,20 @@ class RunService:
l2_resp = await client.get(f"{l2_server}/assets/by-run-id/{run_id}")
if l2_resp.status_code == 200:
l2_data = l2_resp.json()
output_hash = l2_data.get("output_hash")
output_cid = l2_data.get("output_cid")
ipfs_cid = l2_data.get("ipfs_cid")
if output_hash and ipfs_cid:
if output_cid and ipfs_cid:
# Pull from IPFS to local cache
try:
import ipfs_client
legacy_dir = self.cache_dir / "legacy"
legacy_dir.mkdir(parents=True, exist_ok=True)
recovery_path = legacy_dir / output_hash
recovery_path = legacy_dir / output_cid
if ipfs_client.get_file(ipfs_cid, str(recovery_path)):
# Save to database cache
await self.db.save_run_cache(
run_id=run_id,
output_hash=output_hash,
output_cid=output_cid,
recipe=recipe,
inputs=input_list,
ipfs_cid=ipfs_cid,
@@ -380,7 +380,7 @@ class RunService:
"status": "completed",
"recipe": recipe,
"inputs": input_list,
"output_hash": output_hash,
"output_cid": output_cid,
"ipfs_cid": ipfs_cid,
"provenance_cid": l2_data.get("provenance_cid"),
"created_at": datetime.now(timezone.utc).isoformat(),
@@ -493,7 +493,7 @@ class RunService:
plan_cache_id = run.get("plan_cache_id")
if plan_cache_id:
# Get plan from cache by content hash
plan_path = self.cache.get_by_content_hash(plan_cache_id)
plan_path = self.cache.get_by_cid(plan_cache_id)
if plan_path and plan_path.exists():
with open(plan_path) as f:
content = f.read()
@@ -535,12 +535,12 @@ class RunService:
artifacts = []
def get_artifact_info(content_hash: str, role: str, name: str) -> Optional[Dict]:
if self.cache.has_content(content_hash):
path = self.cache.get_by_content_hash(content_hash)
def get_artifact_info(cid: str, role: str, name: str) -> Optional[Dict]:
if self.cache.has_content(cid):
path = self.cache.get_by_cid(cid)
if path and path.exists():
return {
"hash": content_hash,
"hash": cid,
"size_bytes": path.stat().st_size,
"media_type": detect_media_type(path),
"role": role,
@@ -558,8 +558,8 @@ class RunService:
artifacts.append(info)
# Add output
if run.get("output_hash"):
info = get_artifact_info(run["output_hash"], "output", "Output")
if run.get("output_cid"):
info = get_artifact_info(run["output_cid"], "output", "Output")
if info:
artifacts.append(info)
@@ -669,10 +669,10 @@ class RunService:
if result.successful():
# Task completed - move to run_cache
task_result = result.result
if isinstance(task_result, dict) and task_result.get("output_hash"):
if isinstance(task_result, dict) and task_result.get("output_cid"):
await self.db.save_run_cache(
run_id=run_id,
output_hash=task_result["output_hash"],
output_cid=task_result["output_cid"],
recipe=run.get("recipe", "unknown"),
inputs=run.get("inputs", []),
ipfs_cid=task_result.get("ipfs_cid"),