Rename content_hash/output_hash to cid throughout

Refactor to use IPFS CID as the primary content identifier:
- Update database schema: content_hash -> cid, output_hash -> output_cid
- Update all services, routers, and tasks to use cid terminology
- Update HTML templates to display CID instead of hash
- Update cache_manager parameter names
- Update README documentation

This completes the transition to CID-only content addressing.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-12 08:02:44 +00:00
parent 494a2a8650
commit 92d26b2b72
22 changed files with 981 additions and 988 deletions

View File

@@ -5,7 +5,7 @@ L1 rendering server for the Art DAG system. Manages distributed rendering jobs v
## Features
- **3-Phase Execution**: Analyze → Plan → Execute pipeline for recipe-based rendering
- **Content-Addressable Caching**: SHA3-256 hashed content with deduplication
- **Content-Addressable Caching**: IPFS CIDs with deduplication
- **IPFS Integration**: Optional IPFS-primary mode for distributed storage
- **Storage Providers**: S3, IPFS, and local storage backends
- **DAG Visualization**: Interactive graph visualization of execution plans
@@ -130,13 +130,13 @@ Interactive docs: http://localhost:8100/docs
| Method | Path | Description |
|--------|------|-------------|
| GET | `/cache/{hash}` | Get cached content (with preview) |
| GET | `/cache/{hash}/raw` | Download raw content |
| GET | `/cache/{hash}/mp4` | Get MP4 video |
| GET | `/cache/{hash}/meta` | Get content metadata |
| PATCH | `/cache/{hash}/meta` | Update metadata |
| POST | `/cache/{hash}/publish` | Publish to L2 |
| DELETE | `/cache/{hash}` | Delete from cache |
| GET | `/cache/{cid}` | Get cached content (with preview) |
| GET | `/cache/{cid}/raw` | Download raw content |
| GET | `/cache/{cid}/mp4` | Get MP4 video |
| GET | `/cache/{cid}/meta` | Get content metadata |
| PATCH | `/cache/{cid}/meta` | Update metadata |
| POST | `/cache/{cid}/publish` | Publish to L2 |
| DELETE | `/cache/{cid}` | Delete from cache |
| POST | `/cache/import?path=` | Import local file |
| POST | `/cache/upload` | Upload file |
| GET | `/media` | Browse media gallery |
@@ -185,7 +185,7 @@ Recipes are executed in three phases:
### Phase 1: Analyze
Extract features from input files:
- **Audio/Video**: Tempo, beat times, energy levels
- Results cached by content hash
- Results cached by CID
### Phase 2: Plan
Generate an execution plan:
@@ -237,7 +237,7 @@ output: sync_video
### Local Cache
- Location: `~/.artdag/cache/` (or `CACHE_DIR`)
- Content-addressed by SHA3-256 hash
- Content-addressed by IPFS CID
- Subdirectories: `plans/`, `analysis/`
### Redis
@@ -318,12 +318,12 @@ Every render produces a provenance record:
"task_id": "celery-task-uuid",
"rendered_at": "2026-01-07T...",
"rendered_by": "@giles@artdag.rose-ash.com",
"output": {"name": "...", "content_hash": "..."},
"output": {"name": "...", "cid": "Qm..."},
"inputs": [...],
"effects": [...],
"infrastructure": {
"software": {"name": "infra:artdag", "content_hash": "..."},
"hardware": {"name": "infra:giles-hp", "content_hash": "..."}
"software": {"name": "infra:artdag", "cid": "Qm..."},
"hardware": {"name": "infra:giles-hp", "cid": "Qm..."}
}
}
```

View File

@@ -155,13 +155,13 @@ async def run_recipe_endpoint(
# Check if already completed
cached = await database.get_run_cache(run_id)
if cached:
output_hash = cached.get("output_hash")
if cache.has_content(output_hash):
output_cid = cached.get("output_cid")
if cache.has_content(output_cid):
return {
"status": "completed",
"run_id": run_id,
"output_hash": output_hash,
"output_ipfs_cid": cache.get_ipfs_cid(output_hash),
"output_cid": output_cid,
"output_ipfs_cid": cache.get_ipfs_cid(output_cid),
"cached": True,
}
@@ -224,7 +224,7 @@ async def get_run_status(
if result.successful():
task_result = result.get()
data["status"] = task_result.get("status", "completed")
data["output_hash"] = task_result.get("output_cache_id")
data["output_cid"] = task_result.get("output_cache_id")
data["output_ipfs_cid"] = task_result.get("output_ipfs_cid")
data["total_steps"] = task_result.get("total_steps")
data["cached"] = task_result.get("cached")
@@ -250,7 +250,7 @@ async def get_run_status(
return {
"run_id": run_id,
"status": "completed",
"output_hash": cached.get("output_hash"),
"output_cid": cached.get("output_cid"),
"cached": True,
}

View File

@@ -40,9 +40,9 @@ def get_cache_service():
return CacheService(database, get_cache_manager())
@router.get("/{content_hash}")
@router.get("/{cid}")
async def get_cached(
content_hash: str,
cid: str,
request: Request,
cache_service: CacheService = Depends(get_cache_service),
):
@@ -50,16 +50,16 @@ async def get_cached(
auth_service = AuthService(get_redis_client())
ctx = auth_service.get_user_from_cookie(request)
cache_item = await cache_service.get_cache_item(content_hash)
cache_item = await cache_service.get_cache_item(cid)
if not cache_item:
if wants_html(request):
templates = get_templates(request)
return render(templates, "cache/not_found.html", request,
content_hash=content_hash,
cid=cid,
user=ctx,
active_tab="media",
)
raise HTTPException(404, f"Content {content_hash} not in cache")
raise HTTPException(404, f"Content {cid} not in cache")
# JSON response
if wants_json(request):
@@ -71,7 +71,7 @@ async def get_cached(
return RedirectResponse(url="/auth", status_code=302)
# Check access
has_access = await cache_service.check_access(content_hash, ctx.actor_id, ctx.username)
has_access = await cache_service.check_access(cid, ctx.actor_id, ctx.username)
if not has_access:
raise HTTPException(403, "Access denied")
@@ -83,27 +83,27 @@ async def get_cached(
)
@router.get("/{content_hash}/raw")
@router.get("/{cid}/raw")
async def get_cached_raw(
content_hash: str,
cid: str,
cache_service: CacheService = Depends(get_cache_service),
):
"""Get raw cached content (file download)."""
file_path, media_type, filename = await cache_service.get_raw_file(content_hash)
file_path, media_type, filename = await cache_service.get_raw_file(cid)
if not file_path:
raise HTTPException(404, f"Content {content_hash} not in cache")
raise HTTPException(404, f"Content {cid} not in cache")
return FileResponse(file_path, media_type=media_type, filename=filename)
@router.get("/{content_hash}/mp4")
@router.get("/{cid}/mp4")
async def get_cached_mp4(
content_hash: str,
cid: str,
cache_service: CacheService = Depends(get_cache_service),
):
"""Get cached content as MP4 (transcodes MKV on first request)."""
mp4_path, error = await cache_service.get_as_mp4(content_hash)
mp4_path, error = await cache_service.get_as_mp4(cid)
if error:
raise HTTPException(400 if "not a video" in error else 404, error)
@@ -111,29 +111,29 @@ async def get_cached_mp4(
return FileResponse(mp4_path, media_type="video/mp4")
@router.get("/{content_hash}/meta")
@router.get("/{cid}/meta")
async def get_metadata(
content_hash: str,
cid: str,
ctx: UserContext = Depends(require_auth),
cache_service: CacheService = Depends(get_cache_service),
):
"""Get content metadata."""
meta = await cache_service.get_metadata(content_hash, ctx.actor_id)
meta = await cache_service.get_metadata(cid, ctx.actor_id)
if meta is None:
raise HTTPException(404, "Content not found")
return meta
@router.patch("/{content_hash}/meta")
@router.patch("/{cid}/meta")
async def update_metadata(
content_hash: str,
cid: str,
req: UpdateMetadataRequest,
ctx: UserContext = Depends(require_auth),
cache_service: CacheService = Depends(get_cache_service),
):
"""Update content metadata."""
success, error = await cache_service.update_metadata(
content_hash=content_hash,
cid=cid,
actor_id=ctx.actor_id,
title=req.title,
description=req.description,
@@ -147,16 +147,16 @@ async def update_metadata(
return {"updated": True}
@router.post("/{content_hash}/publish")
@router.post("/{cid}/publish")
async def publish_content(
content_hash: str,
cid: str,
request: Request,
ctx: UserContext = Depends(require_auth),
cache_service: CacheService = Depends(get_cache_service),
):
"""Publish content to L2 and IPFS."""
ipfs_cid, error = await cache_service.publish_to_l2(
content_hash=content_hash,
cid=cid,
actor_id=ctx.actor_id,
l2_server=ctx.l2_server,
auth_token=request.cookies.get("auth_token"),
@@ -173,14 +173,14 @@ async def publish_content(
return {"ipfs_cid": ipfs_cid, "published": True}
@router.delete("/{content_hash}")
@router.delete("/{cid}")
async def delete_content(
content_hash: str,
cid: str,
ctx: UserContext = Depends(require_auth),
cache_service: CacheService = Depends(get_cache_service),
):
"""Delete content from cache."""
success, error = await cache_service.delete_content(content_hash, ctx.actor_id)
success, error = await cache_service.delete_content(cid, ctx.actor_id)
if error:
raise HTTPException(400 if "Cannot" in error or "pinned" in error else 404, error)
@@ -195,12 +195,12 @@ async def import_from_ipfs(
cache_service: CacheService = Depends(get_cache_service),
):
"""Import content from IPFS."""
content_hash, error = await cache_service.import_from_ipfs(ipfs_cid, ctx.actor_id)
cid, error = await cache_service.import_from_ipfs(ipfs_cid, ctx.actor_id)
if error:
raise HTTPException(400, error)
return {"content_hash": content_hash, "imported": True}
return {"cid": cid, "imported": True}
@router.post("/upload")
@@ -211,7 +211,7 @@ async def upload_content(
):
"""Upload content to cache and IPFS."""
content = await file.read()
content_hash, ipfs_cid, error = await cache_service.upload_content(
cid, ipfs_cid, error = await cache_service.upload_content(
content=content,
filename=file.filename,
actor_id=ctx.actor_id,
@@ -222,7 +222,7 @@ async def upload_content(
return {
"cid": ipfs_cid,
"content_hash": content_hash, # Legacy, for backwards compatibility
"cid": cid, # Legacy, for backwards compatibility
"filename": file.filename,
"size": len(content),
"uploaded": True,
@@ -272,9 +272,9 @@ async def list_media(
# HTMX metadata form
@router.get("/{content_hash}/meta-form", response_class=HTMLResponse)
@router.get("/{cid}/meta-form", response_class=HTMLResponse)
async def get_metadata_form(
content_hash: str,
cid: str,
request: Request,
cache_service: CacheService = Depends(get_cache_service),
):
@@ -285,11 +285,11 @@ async def get_metadata_form(
if not ctx:
return HTMLResponse('<div class="text-red-400">Login required</div>')
meta = await cache_service.get_metadata(content_hash, ctx.actor_id)
meta = await cache_service.get_metadata(cid, ctx.actor_id)
return HTMLResponse(f'''
<h2 class="text-lg font-semibold mb-4">Metadata</h2>
<form hx-patch="/cache/{content_hash}/meta"
<form hx-patch="/cache/{cid}/meta"
hx-target="#metadata-section"
hx-swap="innerHTML"
class="space-y-4">
@@ -312,9 +312,9 @@ async def get_metadata_form(
''')
@router.patch("/{content_hash}/meta", response_class=HTMLResponse)
@router.patch("/{cid}/meta", response_class=HTMLResponse)
async def update_metadata_htmx(
content_hash: str,
cid: str,
request: Request,
cache_service: CacheService = Depends(get_cache_service),
):
@@ -328,7 +328,7 @@ async def update_metadata_htmx(
form_data = await request.form()
success, error = await cache_service.update_metadata(
content_hash=content_hash,
cid=cid,
actor_id=ctx.actor_id,
title=form_data.get("title"),
description=form_data.get("description"),

View File

@@ -350,7 +350,7 @@ async def run_recipe(
if node.get("type") == "SOURCE" and "asset" in config:
asset_name = config["asset"]
if asset_name in assets:
config["content_hash"] = assets[asset_name].get("hash")
config["cid"] = assets[asset_name].get("hash")
# Resolve effect references for EFFECT nodes
if node.get("type") == "EFFECT" and "effect" in config:
@@ -392,21 +392,21 @@ async def run_recipe(
input_name_to_node[node["name"].replace("-", "_")] = node_id
# Map user-provided input names to content hashes (for variable inputs)
for input_name, content_hash in req.inputs.items():
for input_name, cid in req.inputs.items():
# Try direct node ID match first
if input_name in nodes:
node = nodes[input_name]
if node.get("node_type") == "SOURCE":
if "config" not in node:
node["config"] = {}
node["config"]["content_hash"] = content_hash
node["config"]["cid"] = cid
# Try input name lookup
elif input_name in input_name_to_node:
node_id = input_name_to_node[input_name]
node = nodes[node_id]
if "config" not in node:
node["config"] = {}
node["config"]["content_hash"] = content_hash
node["config"]["cid"] = cid
# Transform output to output_id
if "output" in dag_copy:
@@ -527,7 +527,7 @@ async def publish_recipe(
# Use cache service to publish (recipes are stored in cache)
cache_service = CacheService(database, get_cache_manager())
ipfs_cid, error = await cache_service.publish_to_l2(
content_hash=recipe_id,
cid=recipe_id,
actor_id=ctx.actor_id,
l2_server=ctx.l2_server,
auth_token=request.cookies.get("auth_token"),

View File

@@ -99,7 +99,7 @@ class RunStatus(BaseModel):
output_name: Optional[str] = None
created_at: Optional[str] = None
completed_at: Optional[str] = None
output_hash: Optional[str] = None
output_cid: Optional[str] = None
username: Optional[str] = None
provenance_cid: Optional[str] = None
celery_task_id: Optional[str] = None
@@ -244,13 +244,13 @@ async def get_run(
# Build artifacts list from output and inputs
artifacts = []
output_media_type = None
if run.get("output_hash"):
if run.get("output_cid"):
# Detect media type using magic bytes
output_hash = run["output_hash"]
output_cid = run["output_cid"]
media_type = None
try:
from ..services.run_service import detect_media_type
cache_path = get_cache_manager().get_by_content_hash(output_hash)
cache_path = get_cache_manager().get_by_cid(output_cid)
if cache_path and cache_path.exists():
simple_type = detect_media_type(cache_path)
media_type = type_to_mime(simple_type)
@@ -258,7 +258,7 @@ async def get_run(
except Exception:
pass
artifacts.append({
"hash": output_hash,
"hash": output_cid,
"step_name": "Output",
"media_type": media_type or "application/octet-stream",
})
@@ -271,7 +271,7 @@ async def get_run(
for i, input_hash in enumerate(run["inputs"]):
media_type = None
try:
cache_path = cache_manager.get_by_content_hash(input_hash)
cache_path = cache_manager.get_by_cid(input_hash)
if cache_path and cache_path.exists():
simple_type = detect_media_type(cache_path)
media_type = type_to_mime(simple_type)
@@ -393,9 +393,9 @@ async def list_runs(
for run in runs:
# Add output media info
if run.get("output_hash"):
if run.get("output_cid"):
try:
cache_path = cache_manager.get_by_content_hash(run["output_hash"])
cache_path = cache_manager.get_by_cid(run["output_cid"])
if cache_path and cache_path.exists():
simple_type = detect_media_type(cache_path)
run["output_media_type"] = type_to_mime(simple_type)
@@ -409,7 +409,7 @@ async def list_runs(
for input_hash in inputs[:3]:
preview = {"hash": input_hash, "media_type": None}
try:
cache_path = cache_manager.get_by_content_hash(input_hash)
cache_path = cache_manager.get_by_cid(input_hash)
if cache_path and cache_path.exists():
simple_type = detect_media_type(cache_path)
preview["media_type"] = type_to_mime(simple_type)
@@ -756,8 +756,8 @@ async def publish_run(
raise HTTPException(404, "Run not found")
# Check if run has output
output_hash = run.get("output_hash")
if not output_hash:
output_cid = run.get("output_cid")
if not output_cid:
error = "Run has no output to publish"
if wants_html(request):
return HTMLResponse(f'<span class="text-red-400">{error}</span>')
@@ -766,7 +766,7 @@ async def publish_run(
# Use cache service to publish the output
cache_service = CacheService(database, get_cache_manager())
ipfs_cid, error = await cache_service.publish_to_l2(
content_hash=output_hash,
cid=output_cid,
actor_id=ctx.actor_id,
l2_server=ctx.l2_server,
auth_token=request.cookies.get("auth_token"),
@@ -780,4 +780,4 @@ async def publish_run(
if wants_html(request):
return HTMLResponse(f'<span class="text-green-400">Shared: {ipfs_cid[:16]}...</span>')
return {"ipfs_cid": ipfs_cid, "output_hash": output_hash, "published": True}
return {"ipfs_cid": ipfs_cid, "output_cid": output_cid, "published": True}

View File

@@ -91,26 +91,26 @@ class CacheService:
self.cache = cache_manager
self.cache_dir = Path(os.environ.get("CACHE_DIR", "/tmp/artdag-cache"))
async def get_cache_item(self, content_hash: str) -> Optional[Dict[str, Any]]:
async def get_cache_item(self, cid: str) -> Optional[Dict[str, Any]]:
"""Get cached item with full metadata for display."""
# Check if content exists
if not self.cache.has_content(content_hash):
if not self.cache.has_content(cid):
return None
path = self.cache.get_by_content_hash(content_hash)
path = self.cache.get_by_cid(cid)
if not path or not path.exists():
return None
# Get metadata from database
meta = await self.db.load_item_metadata(content_hash, None)
cache_item = await self.db.get_cache_item(content_hash)
meta = await self.db.load_item_metadata(cid, None)
cache_item = await self.db.get_cache_item(cid)
media_type = detect_media_type(path)
mime_type = get_mime_type(path)
size = path.stat().st_size
return {
"content_hash": content_hash,
"cid": cid,
"path": str(path),
"media_type": media_type,
"mime_type": mime_type,
@@ -119,10 +119,10 @@ class CacheService:
"meta": meta,
}
async def check_access(self, content_hash: str, actor_id: str, username: str) -> bool:
async def check_access(self, cid: str, actor_id: str, username: str) -> bool:
"""Check if user has access to content."""
user_hashes = await self._get_user_cache_hashes(username, actor_id)
return content_hash in user_hashes
return cid in user_hashes
async def _get_user_cache_hashes(self, username: str, actor_id: Optional[str] = None) -> set:
"""Get all cache hashes owned by or associated with a user."""
@@ -137,7 +137,7 @@ class CacheService:
try:
db_items = await self.db.get_user_items(actor_id)
for item in db_items:
hashes.add(item["content_hash"])
hashes.add(item["cid"])
except Exception:
pass
@@ -160,8 +160,8 @@ class CacheService:
if isinstance(inputs, dict):
inputs = list(inputs.values())
hashes.update(inputs)
if run.get("output_hash"):
hashes.add(run["output_hash"])
if run.get("output_cid"):
hashes.add(run["output_cid"])
return hashes
@@ -188,12 +188,12 @@ class CacheService:
return runs
async def get_raw_file(self, content_hash: str) -> Tuple[Optional[Path], Optional[str], Optional[str]]:
async def get_raw_file(self, cid: str) -> Tuple[Optional[Path], Optional[str], Optional[str]]:
"""Get raw file path, media type, and filename for download."""
if not self.cache.has_content(content_hash):
if not self.cache.has_content(cid):
return None, None, None
path = self.cache.get_by_content_hash(content_hash)
path = self.cache.get_by_cid(cid)
if not path or not path.exists():
return None, None, None
@@ -223,17 +223,17 @@ class CacheService:
except Exception:
ext = "jpg"
filename = f"{content_hash}.{ext}"
filename = f"{cid}.{ext}"
return path, mime, filename
async def get_as_mp4(self, content_hash: str) -> Tuple[Optional[Path], Optional[str]]:
async def get_as_mp4(self, cid: str) -> Tuple[Optional[Path], Optional[str]]:
"""Get content as MP4, transcoding if necessary. Returns (path, error)."""
if not self.cache.has_content(content_hash):
return None, f"Content {content_hash} not in cache"
if not self.cache.has_content(cid):
return None, f"Content {cid} not in cache"
path = self.cache.get_by_content_hash(content_hash)
path = self.cache.get_by_cid(cid)
if not path or not path.exists():
return None, f"Content {content_hash} not in cache"
return None, f"Content {cid} not in cache"
# Check if video
media_type = detect_media_type(path)
@@ -241,7 +241,7 @@ class CacheService:
return None, "Content is not a video"
# Check for cached MP4
mp4_path = self.cache_dir / f"{content_hash}.mp4"
mp4_path = self.cache_dir / f"{cid}.mp4"
if mp4_path.exists():
return mp4_path, None
@@ -258,7 +258,7 @@ class CacheService:
pass
# Transcode to MP4
transcode_path = self.cache_dir / f"{content_hash}.transcoding.mp4"
transcode_path = self.cache_dir / f"{cid}.transcoding.mp4"
try:
result = subprocess.run(
["ffmpeg", "-y", "-i", str(path),
@@ -283,15 +283,15 @@ class CacheService:
transcode_path.unlink()
return None, f"Transcoding failed: {e}"
async def get_metadata(self, content_hash: str, actor_id: str) -> Optional[Dict[str, Any]]:
async def get_metadata(self, cid: str, actor_id: str) -> Optional[Dict[str, Any]]:
"""Get content metadata."""
if not self.cache.has_content(content_hash):
if not self.cache.has_content(cid):
return None
return await self.db.load_item_metadata(content_hash, actor_id)
return await self.db.load_item_metadata(cid, actor_id)
async def update_metadata(
self,
content_hash: str,
cid: str,
actor_id: str,
title: str = None,
description: str = None,
@@ -299,7 +299,7 @@ class CacheService:
custom: Dict[str, Any] = None,
) -> Tuple[bool, Optional[str]]:
"""Update content metadata. Returns (success, error)."""
if not self.cache.has_content(content_hash):
if not self.cache.has_content(cid):
return False, "Content not found"
# Build update dict
@@ -314,28 +314,28 @@ class CacheService:
updates["custom"] = custom
try:
await self.db.update_item_metadata(content_hash, actor_id, **updates)
await self.db.update_item_metadata(cid, actor_id, **updates)
return True, None
except Exception as e:
return False, str(e)
async def publish_to_l2(
self,
content_hash: str,
cid: str,
actor_id: str,
l2_server: str,
auth_token: str,
) -> Tuple[Optional[str], Optional[str]]:
"""Publish content to L2 and IPFS. Returns (ipfs_cid, error)."""
if not self.cache.has_content(content_hash):
if not self.cache.has_content(cid):
return None, "Content not found"
# Get IPFS CID
cache_item = await self.db.get_cache_item(content_hash)
cache_item = await self.db.get_cache_item(cid)
ipfs_cid = cache_item.get("ipfs_cid") if cache_item else None
# Get metadata for origin info
meta = await self.db.load_item_metadata(content_hash, actor_id)
meta = await self.db.load_item_metadata(cid, actor_id)
origin = meta.get("origin") if meta else None
if not origin or "type" not in origin:
@@ -351,10 +351,10 @@ class CacheService:
f"{l2_server}/assets/publish-cache",
headers={"Authorization": f"Bearer {auth_token}"},
json={
"content_hash": content_hash,
"cid": cid,
"ipfs_cid": ipfs_cid,
"asset_name": meta.get("title") or content_hash[:16],
"asset_type": detect_media_type(self.cache.get_by_content_hash(content_hash)),
"asset_name": meta.get("title") or cid[:16],
"asset_type": detect_media_type(self.cache.get_by_cid(cid)),
"origin": origin,
"description": meta.get("description"),
"tags": meta.get("tags", []),
@@ -374,14 +374,14 @@ class CacheService:
# Update local metadata with publish status
await self.db.save_l2_share(
content_hash=content_hash,
cid=cid,
actor_id=actor_id,
l2_server=l2_server,
asset_name=meta.get("title") or content_hash[:16],
content_type=detect_media_type(self.cache.get_by_content_hash(content_hash))
asset_name=meta.get("title") or cid[:16],
content_type=detect_media_type(self.cache.get_by_cid(cid))
)
await self.db.update_item_metadata(
content_hash=content_hash,
cid=cid,
actor_id=actor_id,
pinned=True,
pin_reason="published"
@@ -389,37 +389,37 @@ class CacheService:
return l2_result.get("ipfs_cid") or ipfs_cid, None
async def delete_content(self, content_hash: str, actor_id: str) -> Tuple[bool, Optional[str]]:
async def delete_content(self, cid: str, actor_id: str) -> Tuple[bool, Optional[str]]:
"""Delete content from cache. Returns (success, error)."""
if not self.cache.has_content(content_hash):
if not self.cache.has_content(cid):
return False, "Content not found"
# Check if pinned
meta = await self.db.load_item_metadata(content_hash, actor_id)
meta = await self.db.load_item_metadata(cid, actor_id)
if meta and meta.get("pinned"):
pin_reason = meta.get("pin_reason", "unknown")
return False, f"Cannot discard pinned item (reason: {pin_reason})"
# Check deletion rules via cache_manager
can_delete, reason = self.cache.can_delete(content_hash)
can_delete, reason = self.cache.can_delete(cid)
if not can_delete:
return False, f"Cannot discard: {reason}"
# Delete via cache_manager
success, msg = self.cache.delete_by_content_hash(content_hash)
success, msg = self.cache.delete_by_cid(cid)
# Clean up legacy metadata files
meta_path = self.cache_dir / f"{content_hash}.meta.json"
meta_path = self.cache_dir / f"{cid}.meta.json"
if meta_path.exists():
meta_path.unlink()
mp4_path = self.cache_dir / f"{content_hash}.mp4"
mp4_path = self.cache_dir / f"{cid}.mp4"
if mp4_path.exists():
mp4_path.unlink()
return True, None
async def import_from_ipfs(self, ipfs_cid: str, actor_id: str) -> Tuple[Optional[str], Optional[str]]:
"""Import content from IPFS. Returns (content_hash, error)."""
"""Import content from IPFS. Returns (cid, error)."""
try:
import ipfs_client
@@ -433,18 +433,18 @@ class CacheService:
# Store in cache
cached, _ = self.cache.put(tmp_path, node_type="import", move=True)
content_hash = cached.content_hash
cid = cached.cid
# Save to database
await self.db.create_cache_item(content_hash, ipfs_cid)
await self.db.create_cache_item(cid, ipfs_cid)
await self.db.save_item_metadata(
content_hash=content_hash,
cid=cid,
actor_id=actor_id,
item_type="media",
filename=f"ipfs-{ipfs_cid[:16]}"
)
return content_hash, None
return cid, None
except Exception as e:
return None, f"Import failed: {e}"
@@ -454,7 +454,7 @@ class CacheService:
filename: str,
actor_id: str,
) -> Tuple[Optional[str], Optional[str], Optional[str]]:
"""Upload content to cache. Returns (content_hash, ipfs_cid, error)."""
"""Upload content to cache. Returns (cid, ipfs_cid, error)."""
import tempfile
try:
@@ -468,18 +468,18 @@ class CacheService:
# Store in cache (also stores in IPFS)
cached, ipfs_cid = self.cache.put(tmp_path, node_type="upload", move=True)
content_hash = cached.content_hash
cid = cached.cid
# Save to database with detected MIME type
await self.db.create_cache_item(content_hash, ipfs_cid)
await self.db.create_cache_item(cid, ipfs_cid)
await self.db.save_item_metadata(
content_hash=content_hash,
cid=cid,
actor_id=actor_id,
item_type=mime_type, # Store actual MIME type
filename=filename
)
return content_hash, ipfs_cid, None
return cid, ipfs_cid, None
except Exception as e:
return None, None, f"Upload failed: {e}"
@@ -502,10 +502,10 @@ class CacheService:
return items
# Legacy compatibility methods
def has_content(self, content_hash: str) -> bool:
def has_content(self, cid: str) -> bool:
"""Check if content exists in cache."""
return self.cache.has_content(content_hash)
return self.cache.has_content(cid)
def get_ipfs_cid(self, content_hash: str) -> Optional[str]:
def get_ipfs_cid(self, cid: str) -> Optional[str]:
"""Get IPFS CID for cached content."""
return self.cache.get_ipfs_cid(content_hash)
return self.cache.get_ipfs_cid(cid)

View File

@@ -27,7 +27,7 @@ class RecipeService:
async def get_recipe(self, recipe_id: str) -> Optional[Dict[str, Any]]:
"""Get a recipe by ID (content hash)."""
# Get from cache (content-addressed storage)
path = self.cache.get_by_content_hash(recipe_id)
path = self.cache.get_by_cid(recipe_id)
if not path or not path.exists():
return None
@@ -70,8 +70,8 @@ class RecipeService:
if hasattr(self.cache, 'list_by_type'):
items = self.cache.list_by_type('recipe')
logger.info(f"Found {len(items)} recipes in cache")
for content_hash in items:
recipe = await self.get_recipe(content_hash)
for cid in items:
recipe = await self.get_recipe(cid)
if recipe and not recipe.get("error"):
owner = recipe.get("owner")
# Filter by actor - L1 is per-user
@@ -114,7 +114,7 @@ class RecipeService:
# Store in cache (content-addressed, auto-pins to IPFS)
cached, ipfs_cid = self.cache.put(tmp_path, node_type="recipe", move=True)
recipe_id = cached.content_hash
recipe_id = cached.cid
return recipe_id, None
@@ -140,12 +140,12 @@ class RecipeService:
# Delete from cache
try:
if hasattr(self.cache, 'delete_by_content_hash'):
success, msg = self.cache.delete_by_content_hash(recipe_id)
if hasattr(self.cache, 'delete_by_cid'):
success, msg = self.cache.delete_by_cid(recipe_id)
if not success:
return False, msg
else:
path = self.cache.get_by_content_hash(recipe_id)
path = self.cache.get_by_cid(recipe_id)
if path and path.exists():
path.unlink()

View File

@@ -122,7 +122,7 @@ class RunService:
"status": "completed",
"recipe": cached.get("recipe"),
"inputs": self._ensure_inputs_list(cached.get("inputs")),
"output_hash": cached.get("output_hash"),
"output_cid": cached.get("output_cid"),
"ipfs_cid": cached.get("ipfs_cid"),
"provenance_cid": cached.get("provenance_cid"),
"actor_id": cached.get("actor_id"),
@@ -171,7 +171,7 @@ class RunService:
run_data["status"] = "completed"
task_result = result.result
if isinstance(task_result, dict):
run_data["output_hash"] = task_result.get("output_hash")
run_data["output_cid"] = task_result.get("output_cid")
else:
run_data["status"] = "failed"
run_data["error"] = str(result.result)
@@ -258,7 +258,7 @@ class RunService:
run_data["status"] = "completed"
task_result = result.result
if isinstance(task_result, dict):
run_data["output_hash"] = task_result.get("output_hash")
run_data["output_cid"] = task_result.get("output_cid")
else:
run_data["status"] = "failed"
run_data["error"] = str(result.result)
@@ -332,15 +332,15 @@ class RunService:
# Check database cache first (completed runs)
cached_run = await self.db.get_run_cache(run_id)
if cached_run:
output_hash = cached_run.get("output_hash")
if output_hash and self.cache.has_content(output_hash):
output_cid = cached_run.get("output_cid")
if output_cid and self.cache.has_content(output_cid):
return {
"run_id": run_id,
"status": "completed",
"recipe": recipe,
"inputs": input_list,
"output_name": output_name,
"output_hash": output_hash,
"output_cid": output_cid,
"ipfs_cid": cached_run.get("ipfs_cid"),
"provenance_cid": cached_run.get("provenance_cid"),
"created_at": cached_run.get("created_at"),
@@ -355,20 +355,20 @@ class RunService:
l2_resp = await client.get(f"{l2_server}/assets/by-run-id/{run_id}")
if l2_resp.status_code == 200:
l2_data = l2_resp.json()
output_hash = l2_data.get("output_hash")
output_cid = l2_data.get("output_cid")
ipfs_cid = l2_data.get("ipfs_cid")
if output_hash and ipfs_cid:
if output_cid and ipfs_cid:
# Pull from IPFS to local cache
try:
import ipfs_client
legacy_dir = self.cache_dir / "legacy"
legacy_dir.mkdir(parents=True, exist_ok=True)
recovery_path = legacy_dir / output_hash
recovery_path = legacy_dir / output_cid
if ipfs_client.get_file(ipfs_cid, str(recovery_path)):
# Save to database cache
await self.db.save_run_cache(
run_id=run_id,
output_hash=output_hash,
output_cid=output_cid,
recipe=recipe,
inputs=input_list,
ipfs_cid=ipfs_cid,
@@ -380,7 +380,7 @@ class RunService:
"status": "completed",
"recipe": recipe,
"inputs": input_list,
"output_hash": output_hash,
"output_cid": output_cid,
"ipfs_cid": ipfs_cid,
"provenance_cid": l2_data.get("provenance_cid"),
"created_at": datetime.now(timezone.utc).isoformat(),
@@ -493,7 +493,7 @@ class RunService:
plan_cache_id = run.get("plan_cache_id")
if plan_cache_id:
# Get plan from cache by content hash
plan_path = self.cache.get_by_content_hash(plan_cache_id)
plan_path = self.cache.get_by_cid(plan_cache_id)
if plan_path and plan_path.exists():
with open(plan_path) as f:
content = f.read()
@@ -535,12 +535,12 @@ class RunService:
artifacts = []
def get_artifact_info(content_hash: str, role: str, name: str) -> Optional[Dict]:
if self.cache.has_content(content_hash):
path = self.cache.get_by_content_hash(content_hash)
def get_artifact_info(cid: str, role: str, name: str) -> Optional[Dict]:
if self.cache.has_content(cid):
path = self.cache.get_by_cid(cid)
if path and path.exists():
return {
"hash": content_hash,
"hash": cid,
"size_bytes": path.stat().st_size,
"media_type": detect_media_type(path),
"role": role,
@@ -558,8 +558,8 @@ class RunService:
artifacts.append(info)
# Add output
if run.get("output_hash"):
info = get_artifact_info(run["output_hash"], "output", "Output")
if run.get("output_cid"):
info = get_artifact_info(run["output_cid"], "output", "Output")
if info:
artifacts.append(info)
@@ -669,10 +669,10 @@ class RunService:
if result.successful():
# Task completed - move to run_cache
task_result = result.result
if isinstance(task_result, dict) and task_result.get("output_hash"):
if isinstance(task_result, dict) and task_result.get("output_cid"):
await self.db.save_run_cache(
run_id=run_id,
output_hash=task_result["output_hash"],
output_cid=task_result["output_cid"],
recipe=run.get("recipe", "unknown"),
inputs=run.get("inputs", []),
ipfs_cid=task_result.get("ipfs_cid"),

View File

@@ -1,29 +1,29 @@
{% extends "base.html" %}
{% block title %}{{ cache.content_hash[:16] }} - Cache - Art-DAG L1{% endblock %}
{% block title %}{{ cache.cid[:16] }} - Cache - Art-DAG L1{% endblock %}
{% block content %}
<div class="max-w-4xl mx-auto">
<!-- Header -->
<div class="flex items-center space-x-4 mb-6">
<a href="/media" class="text-gray-400 hover:text-white">&larr; Media</a>
<h1 class="text-xl font-bold font-mono">{{ cache.content_hash[:24] }}...</h1>
<h1 class="text-xl font-bold font-mono">{{ cache.cid[:24] }}...</h1>
</div>
<!-- Preview -->
<div class="bg-gray-800 rounded-lg border border-gray-700 mb-6 overflow-hidden">
{% if cache.mime_type and cache.mime_type.startswith('image/') %}
<img src="/cache/{{ cache.content_hash }}/raw" alt=""
<img src="/cache/{{ cache.cid }}/raw" alt=""
class="w-full max-h-96 object-contain bg-gray-900">
{% elif cache.mime_type and cache.mime_type.startswith('video/') %}
<video src="/cache/{{ cache.content_hash }}/raw" controls
<video src="/cache/{{ cache.cid }}/raw" controls
class="w-full max-h-96 bg-gray-900">
</video>
{% elif cache.mime_type and cache.mime_type.startswith('audio/') %}
<div class="p-8 bg-gray-900">
<audio src="/cache/{{ cache.content_hash }}/raw" controls class="w-full"></audio>
<audio src="/cache/{{ cache.cid }}/raw" controls class="w-full"></audio>
</div>
{% elif cache.mime_type == 'application/json' %}
@@ -42,8 +42,8 @@
<!-- Metadata -->
<div class="grid grid-cols-2 gap-4 mb-6">
<div class="bg-gray-800 rounded-lg p-4">
<div class="text-gray-500 text-sm">Hash</div>
<div class="font-mono text-sm text-white break-all">{{ cache.content_hash }}</div>
<div class="text-gray-500 text-sm">CID</div>
<div class="font-mono text-sm text-white break-all">{{ cache.cid }}</div>
</div>
<div class="bg-gray-800 rounded-lg p-4">
<div class="text-gray-500 text-sm">Content Type</div>
@@ -92,12 +92,12 @@
<!-- Actions -->
<div class="flex items-center space-x-4 mt-8">
<a href="/cache/{{ cache.content_hash }}/raw"
<a href="/cache/{{ cache.cid }}/raw"
download
class="bg-blue-600 hover:bg-blue-700 px-4 py-2 rounded font-medium">
Download
</a>
<button hx-post="/cache/{{ cache.content_hash }}/publish"
<button hx-post="/cache/{{ cache.cid }}/publish"
hx-target="#share-result"
class="bg-purple-600 hover:bg-purple-700 px-4 py-2 rounded font-medium">
Share to L2

View File

@@ -25,19 +25,19 @@
{% set is_video = item.type in ('video', 'video/mp4', 'video/webm', 'video/x-matroska') or (item.filename and item.filename.lower().endswith(('.mp4', '.mkv', '.webm', '.mov'))) %}
{% set is_audio = item.type in ('audio', 'audio/mpeg', 'audio/wav', 'audio/flac') or (item.filename and item.filename.lower().endswith(('.mp3', '.wav', '.flac', '.ogg'))) %}
<a href="/cache/{{ item.content_hash }}"
<a href="/cache/{{ item.cid }}"
class="media-item bg-gray-800 rounded-lg overflow-hidden hover:ring-2 hover:ring-blue-500 transition-all"
data-type="{% if is_image %}image{% elif is_video %}video{% elif is_audio %}audio{% else %}other{% endif %}">
{% if is_image %}
<img src="/cache/{{ item.content_hash }}/raw"
<img src="/cache/{{ item.cid }}/raw"
alt=""
loading="lazy"
class="w-full h-40 object-cover">
{% elif is_video %}
<div class="relative">
<video src="/cache/{{ item.content_hash }}/raw"
<video src="/cache/{{ item.cid }}/raw"
class="w-full h-40 object-cover"
muted
onmouseover="this.play()"
@@ -68,7 +68,7 @@
{% endif %}
<div class="p-3">
<div class="font-mono text-xs text-gray-500 truncate">{{ item.content_hash[:16] }}...</div>
<div class="font-mono text-xs text-gray-500 truncate">{{ item.cid[:16] }}...</div>
{% if item.filename %}
<div class="text-xs text-gray-600 truncate">{{ item.filename }}</div>
{% endif %}

View File

@@ -44,9 +44,9 @@
<span class="text-xs text-gray-500 mr-1">In:</span>
{% for inp in run.input_previews %}
{% if inp.media_type and inp.media_type.startswith('image/') %}
<img src="/cache/{{ inp.hash }}/raw" alt="" class="w-10 h-10 object-cover rounded">
<img src="/cache/{{ inp.cid }}/raw" alt="" class="w-10 h-10 object-cover rounded">
{% elif inp.media_type and inp.media_type.startswith('video/') %}
<video src="/cache/{{ inp.hash }}/raw" class="w-10 h-10 object-cover rounded" muted></video>
<video src="/cache/{{ inp.cid }}/raw" class="w-10 h-10 object-cover rounded" muted></video>
{% else %}
<div class="w-10 h-10 bg-gray-700 rounded flex items-center justify-center text-gray-500 text-xs">?</div>
{% endif %}
@@ -65,13 +65,13 @@
<span class="text-gray-600">-></span>
{# Output preview #}
{% if run.output_hash %}
{% if run.output_cid %}
<div class="flex items-center space-x-1">
<span class="text-xs text-gray-500 mr-1">Out:</span>
{% if run.output_media_type and run.output_media_type.startswith('image/') %}
<img src="/cache/{{ run.output_hash }}/raw" alt="" class="w-10 h-10 object-cover rounded">
<img src="/cache/{{ run.output_cid }}/raw" alt="" class="w-10 h-10 object-cover rounded">
{% elif run.output_media_type and run.output_media_type.startswith('video/') %}
<video src="/cache/{{ run.output_hash }}/raw" class="w-10 h-10 object-cover rounded" muted></video>
<video src="/cache/{{ run.output_cid }}/raw" class="w-10 h-10 object-cover rounded" muted></video>
{% else %}
<div class="w-10 h-10 bg-gray-700 rounded flex items-center justify-center text-gray-500 text-xs">?</div>
{% endif %}
@@ -82,8 +82,8 @@
<div class="flex-grow"></div>
{% if run.output_hash %}
<span class="font-mono text-xs text-gray-600">{{ run.output_hash[:12] }}...</span>
{% if run.output_cid %}
<span class="font-mono text-xs text-gray-600">{{ run.output_cid[:12] }}...</span>
{% endif %}
</div>
</a>

View File

@@ -211,20 +211,20 @@
{% if artifacts %}
<div class="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-4 gap-4">
{% for artifact in artifacts %}
<a href="/cache/{{ artifact.hash }}"
<a href="/cache/{{ artifact.cid }}"
class="bg-gray-800 rounded-lg p-4 hover:bg-gray-750 transition-colors">
{% if artifact.media_type and artifact.media_type.startswith('image/') %}
<img src="/cache/{{ artifact.hash }}/raw" alt=""
<img src="/cache/{{ artifact.cid }}/raw" alt=""
class="w-full h-32 object-cover rounded mb-2">
{% elif artifact.media_type and artifact.media_type.startswith('video/') %}
<video src="/cache/{{ artifact.hash }}/raw"
<video src="/cache/{{ artifact.cid }}/raw"
class="w-full h-32 object-cover rounded mb-2" muted></video>
{% else %}
<div class="w-full h-32 bg-gray-900 rounded mb-2 flex items-center justify-center text-gray-600">
{{ artifact.media_type or 'Unknown' }}
</div>
{% endif %}
<div class="font-mono text-xs text-gray-500 truncate">{{ artifact.hash[:16] }}...</div>
<div class="font-mono text-xs text-gray-500 truncate">{{ artifact.cid[:16] }}...</div>
<div class="text-sm text-gray-400">{{ artifact.step_name }}</div>
</a>
{% endfor %}
@@ -242,8 +242,8 @@
<div class="bg-gray-800 rounded-lg p-6">
<div class="flex items-center justify-between mb-4">
<h3 class="text-lg font-semibold">{{ item.input_name }}</h3>
<a href="/cache/{{ item.input_hash }}" class="font-mono text-xs text-blue-400 hover:text-blue-300">
{{ item.input_hash[:16] }}...
<a href="/cache/{{ item.input_cid }}" class="font-mono text-xs text-blue-400 hover:text-blue-300">
{{ item.input_cid[:16] }}...
</a>
</div>
@@ -330,21 +330,21 @@
<div class="bg-gray-800 rounded-lg overflow-hidden">
<!-- Media Preview -->
{% if input.media_type and input.media_type.startswith('image/') %}
<a href="/cache/{{ input.hash }}" class="block">
<img src="/cache/{{ input.hash }}/raw" alt="{{ input.name or 'Input' }}"
<a href="/cache/{{ input.cid }}" class="block">
<img src="/cache/{{ input.cid }}/raw" alt="{{ input.name or 'Input' }}"
class="w-full h-48 object-cover">
</a>
{% elif input.media_type and input.media_type.startswith('video/') %}
<a href="/cache/{{ input.hash }}" class="block">
<video src="/cache/{{ input.hash }}/raw"
<a href="/cache/{{ input.cid }}" class="block">
<video src="/cache/{{ input.cid }}/raw"
class="w-full h-48 object-cover" muted controls></video>
</a>
{% elif input.media_type and input.media_type.startswith('audio/') %}
<div class="p-4 bg-gray-900">
<audio src="/cache/{{ input.hash }}/raw" controls class="w-full"></audio>
<audio src="/cache/{{ input.cid }}/raw" controls class="w-full"></audio>
</div>
{% else %}
<a href="/cache/{{ input.hash }}" class="block">
<a href="/cache/{{ input.cid }}" class="block">
<div class="w-full h-48 bg-gray-900 flex items-center justify-center text-gray-600">
<div class="text-center">
<div class="text-4xl mb-2">📄</div>
@@ -358,8 +358,8 @@
{% if input.name %}
<div class="font-medium text-white mb-1">{{ input.name }}</div>
{% endif %}
<a href="/cache/{{ input.hash }}" class="font-mono text-xs text-blue-400 hover:text-blue-300 block truncate">
{{ input.hash }}
<a href="/cache/{{ input.cid }}" class="font-mono text-xs text-blue-400 hover:text-blue-300 block truncate">
{{ input.cid }}
</a>
{% if input.media_type %}
<div class="text-xs text-gray-500 mt-1">{{ input.media_type }}</div>
@@ -384,22 +384,22 @@
</div>
<!-- Output -->
{% if run.output_hash %}
{% if run.output_cid %}
<div class="mt-8 bg-gray-800 rounded-lg p-6">
<h3 class="text-lg font-semibold mb-4">Output</h3>
{# Inline media preview #}
<div class="mb-4">
{% if output_media_type and output_media_type.startswith('image/') %}
<a href="/cache/{{ run.output_hash }}" class="block">
<img src="/cache/{{ run.output_hash }}/raw" alt="Output"
<a href="/cache/{{ run.output_cid }}" class="block">
<img src="/cache/{{ run.output_cid }}/raw" alt="Output"
class="max-w-full max-h-96 rounded-lg mx-auto">
</a>
{% elif output_media_type and output_media_type.startswith('video/') %}
<video src="/cache/{{ run.output_hash }}/raw" controls
<video src="/cache/{{ run.output_cid }}/raw" controls
class="max-w-full max-h-96 rounded-lg mx-auto"></video>
{% elif output_media_type and output_media_type.startswith('audio/') %}
<audio src="/cache/{{ run.output_hash }}/raw" controls class="w-full"></audio>
<audio src="/cache/{{ run.output_cid }}/raw" controls class="w-full"></audio>
{% else %}
<div class="bg-gray-900 rounded-lg p-8 text-center text-gray-500">
<div class="text-4xl mb-2">?</div>
@@ -409,8 +409,8 @@
</div>
<div class="flex items-center justify-between">
<a href="/cache/{{ run.output_hash }}" class="font-mono text-sm text-blue-400 hover:text-blue-300">
{{ run.output_hash }}
<a href="/cache/{{ run.output_cid }}" class="font-mono text-sm text-blue-400 hover:text-blue-300">
{{ run.output_cid }}
</a>
{% if run.output_ipfs_cid %}
<a href="https://ipfs.io/ipfs/{{ run.output_ipfs_cid }}"

View File

@@ -3,7 +3,7 @@
Cache management for Art DAG L1 server.
Integrates artdag's Cache, ActivityStore, and ActivityManager to provide:
- Content-addressed caching with both node_id and content_hash
- Content-addressed caching with both node_id and cid
- Activity tracking for runs (input/output/intermediate relationships)
- Deletion rules enforcement (shared items protected)
- L2 ActivityPub integration for "shared" status checks
@@ -35,7 +35,7 @@ logger = logging.getLogger(__name__)
def file_hash(path: Path, algorithm: str = "sha3_256") -> str:
"""Compute SHA3-256 hash of a file."""
"""Compute local content hash (fallback when IPFS unavailable)."""
hasher = hashlib.new(algorithm)
actual_path = path.resolve() if path.is_symlink() else path
with open(actual_path, "rb") as f:
@@ -51,10 +51,10 @@ class CachedFile:
Provides a unified view combining:
- node_id: computation identity (for DAG caching)
- content_hash: file content identity (for external references)
- cid: file content identity (for external references)
"""
node_id: str
content_hash: str
cid: str
path: Path
size_bytes: int
node_type: str
@@ -64,7 +64,7 @@ class CachedFile:
def from_cache_entry(cls, entry: CacheEntry) -> "CachedFile":
return cls(
node_id=entry.node_id,
content_hash=entry.content_hash,
cid=entry.cid,
path=entry.output_path,
size_bytes=entry.size_bytes,
node_type=entry.node_type,
@@ -84,41 +84,41 @@ class L2SharedChecker:
self.cache_ttl = cache_ttl
self._cache: Dict[str, tuple[bool, float]] = {}
def is_shared(self, content_hash: str) -> bool:
"""Check if content_hash has been published to L2."""
def is_shared(self, cid: str) -> bool:
"""Check if cid has been published to L2."""
import time
now = time.time()
# Check cache
if content_hash in self._cache:
is_shared, cached_at = self._cache[content_hash]
if cid in self._cache:
is_shared, cached_at = self._cache[cid]
if now - cached_at < self.cache_ttl:
logger.debug(f"L2 check (cached): {content_hash[:16]}... = {is_shared}")
logger.debug(f"L2 check (cached): {cid[:16]}... = {is_shared}")
return is_shared
# Query L2
try:
url = f"{self.l2_server}/assets/by-hash/{content_hash}"
url = f"{self.l2_server}/assets/by-hash/{cid}"
logger.info(f"L2 check: GET {url}")
resp = requests.get(url, timeout=5)
logger.info(f"L2 check response: {resp.status_code}")
is_shared = resp.status_code == 200
except Exception as e:
logger.warning(f"Failed to check L2 for {content_hash}: {e}")
logger.warning(f"Failed to check L2 for {cid}: {e}")
# On error, assume IS shared (safer - prevents accidental deletion)
is_shared = True
self._cache[content_hash] = (is_shared, now)
self._cache[cid] = (is_shared, now)
return is_shared
def invalidate(self, content_hash: str):
"""Invalidate cache for a content_hash (call after publishing)."""
self._cache.pop(content_hash, None)
def invalidate(self, cid: str):
"""Invalidate cache for a cid (call after publishing)."""
self._cache.pop(cid, None)
def mark_shared(self, content_hash: str):
def mark_shared(self, cid: str):
"""Mark as shared without querying (call after successful publish)."""
import time
self._cache[content_hash] = (True, time.time())
self._cache[cid] = (True, time.time())
class L1CacheManager:
@@ -131,7 +131,7 @@ class L1CacheManager:
- ActivityManager for deletion rules
- L2 integration for shared status
Provides both node_id and content_hash based access.
Provides both node_id and cid based access.
"""
def __init__(
@@ -162,16 +162,16 @@ class L1CacheManager:
is_shared_fn=self._is_shared_by_node_id,
)
# Content hash index: content_hash -> node_id
# Content hash index: cid -> node_id
# Uses Redis if available, falls back to in-memory dict
self._content_index: Dict[str, str] = {}
self._load_content_index()
# IPFS CID index: content_hash -> ipfs_cid
# IPFS CID index: cid -> ipfs_cid
self._ipfs_cids: Dict[str, str] = {}
self._load_ipfs_index()
# Legacy files directory (for files uploaded directly by content_hash)
# Legacy files directory (for files uploaded directly by cid)
self.legacy_dir = self.cache_dir / "legacy"
self.legacy_dir.mkdir(parents=True, exist_ok=True)
@@ -179,7 +179,7 @@ class L1CacheManager:
return self.cache_dir / "content_index.json"
def _load_content_index(self):
"""Load content_hash -> node_id index from Redis or JSON file."""
"""Load cid -> node_id index from Redis or JSON file."""
# If Redis available and has data, use it
if self._redis:
try:
@@ -206,8 +206,8 @@ class L1CacheManager:
# Also index from existing cache entries
for entry in self.cache.list_entries():
if entry.content_hash:
self._content_index[entry.content_hash] = entry.node_id
if entry.cid:
self._content_index[entry.cid] = entry.node_id
# Migrate to Redis if available
if self._redis and self._content_index:
@@ -218,39 +218,39 @@ class L1CacheManager:
logger.warning(f"Failed to migrate content index to Redis: {e}")
def _save_content_index(self):
"""Save content_hash -> node_id index to Redis and JSON file."""
"""Save cid -> node_id index to Redis and JSON file."""
# Always save to JSON as backup
with open(self._index_path(), "w") as f:
json.dump(self._content_index, f, indent=2)
def _set_content_index(self, content_hash: str, node_id: str):
def _set_content_index(self, cid: str, node_id: str):
"""Set a single content index entry (Redis + in-memory)."""
self._content_index[content_hash] = node_id
self._content_index[cid] = node_id
if self._redis:
try:
self._redis.hset(self._redis_content_key, content_hash, node_id)
self._redis.hset(self._redis_content_key, cid, node_id)
except Exception as e:
logger.warning(f"Failed to set content index in Redis: {e}")
self._save_content_index()
def _get_content_index(self, content_hash: str) -> Optional[str]:
def _get_content_index(self, cid: str) -> Optional[str]:
"""Get a content index entry (Redis-first, then in-memory)."""
if self._redis:
try:
val = self._redis.hget(self._redis_content_key, content_hash)
val = self._redis.hget(self._redis_content_key, cid)
if val:
return val.decode() if isinstance(val, bytes) else val
except Exception as e:
logger.warning(f"Failed to get content index from Redis: {e}")
return self._content_index.get(content_hash)
return self._content_index.get(cid)
def _del_content_index(self, content_hash: str):
def _del_content_index(self, cid: str):
"""Delete a content index entry."""
if content_hash in self._content_index:
del self._content_index[content_hash]
if cid in self._content_index:
del self._content_index[cid]
if self._redis:
try:
self._redis.hdel(self._redis_content_key, content_hash)
self._redis.hdel(self._redis_content_key, cid)
except Exception as e:
logger.warning(f"Failed to delete content index from Redis: {e}")
self._save_content_index()
@@ -259,7 +259,7 @@ class L1CacheManager:
return self.cache_dir / "ipfs_index.json"
def _load_ipfs_index(self):
"""Load content_hash -> ipfs_cid index from Redis or JSON file."""
"""Load cid -> ipfs_cid index from Redis or JSON file."""
# If Redis available and has data, use it
if self._redis:
try:
@@ -293,71 +293,71 @@ class L1CacheManager:
logger.warning(f"Failed to migrate IPFS index to Redis: {e}")
def _save_ipfs_index(self):
"""Save content_hash -> ipfs_cid index to JSON file (backup)."""
"""Save cid -> ipfs_cid index to JSON file (backup)."""
with open(self._ipfs_index_path(), "w") as f:
json.dump(self._ipfs_cids, f, indent=2)
def _set_ipfs_index(self, content_hash: str, ipfs_cid: str):
def _set_ipfs_index(self, cid: str, ipfs_cid: str):
"""Set a single IPFS index entry (Redis + in-memory)."""
self._ipfs_cids[content_hash] = ipfs_cid
self._ipfs_cids[cid] = ipfs_cid
if self._redis:
try:
self._redis.hset(self._redis_ipfs_key, content_hash, ipfs_cid)
self._redis.hset(self._redis_ipfs_key, cid, ipfs_cid)
except Exception as e:
logger.warning(f"Failed to set IPFS index in Redis: {e}")
self._save_ipfs_index()
def _get_ipfs_cid_from_index(self, content_hash: str) -> Optional[str]:
def _get_ipfs_cid_from_index(self, cid: str) -> Optional[str]:
"""Get IPFS CID from index (Redis-first, then in-memory)."""
if self._redis:
try:
val = self._redis.hget(self._redis_ipfs_key, content_hash)
val = self._redis.hget(self._redis_ipfs_key, cid)
if val:
return val.decode() if isinstance(val, bytes) else val
except Exception as e:
logger.warning(f"Failed to get IPFS CID from Redis: {e}")
return self._ipfs_cids.get(content_hash)
return self._ipfs_cids.get(cid)
def get_ipfs_cid(self, content_hash: str) -> Optional[str]:
def get_ipfs_cid(self, cid: str) -> Optional[str]:
"""Get IPFS CID for a content hash."""
return self._get_ipfs_cid_from_index(content_hash)
return self._get_ipfs_cid_from_index(cid)
def _is_shared_by_node_id(self, content_hash: str) -> bool:
"""Check if a content_hash is shared via L2."""
return self.l2_checker.is_shared(content_hash)
def _is_shared_by_node_id(self, cid: str) -> bool:
"""Check if a cid is shared via L2."""
return self.l2_checker.is_shared(cid)
def _load_meta(self, content_hash: str) -> dict:
def _load_meta(self, cid: str) -> dict:
"""Load metadata for a cached file."""
meta_path = self.cache_dir / f"{content_hash}.meta.json"
meta_path = self.cache_dir / f"{cid}.meta.json"
if meta_path.exists():
with open(meta_path) as f:
return json.load(f)
return {}
def is_pinned(self, content_hash: str) -> tuple[bool, str]:
def is_pinned(self, cid: str) -> tuple[bool, str]:
"""
Check if a content_hash is pinned (non-deletable).
Check if a cid is pinned (non-deletable).
Returns:
(is_pinned, reason) tuple
"""
meta = self._load_meta(content_hash)
meta = self._load_meta(cid)
if meta.get("pinned"):
return True, meta.get("pin_reason", "published")
return False, ""
def _save_meta(self, content_hash: str, **updates) -> dict:
def _save_meta(self, cid: str, **updates) -> dict:
"""Save/update metadata for a cached file."""
meta = self._load_meta(content_hash)
meta = self._load_meta(cid)
meta.update(updates)
meta_path = self.cache_dir / f"{content_hash}.meta.json"
meta_path = self.cache_dir / f"{cid}.meta.json"
with open(meta_path, "w") as f:
json.dump(meta, f, indent=2)
return meta
def pin(self, content_hash: str, reason: str = "published") -> None:
def pin(self, cid: str, reason: str = "published") -> None:
"""Mark an item as pinned (non-deletable)."""
self._save_meta(content_hash, pinned=True, pin_reason=reason)
self._save_meta(cid, pinned=True, pin_reason=reason)
# ============ File Storage ============
@@ -375,31 +375,28 @@ class L1CacheManager:
Args:
source_path: Path to file to cache
node_type: Type of node (e.g., "upload", "source", "effect")
node_id: Optional node_id; if not provided, uses content_hash
node_id: Optional node_id; if not provided, uses CID
execution_time: How long the operation took
move: If True, move instead of copy
Returns:
Tuple of (CachedFile with both node_id and content_hash, IPFS CID or None)
Tuple of (CachedFile with both node_id and cid, CID)
"""
# Compute content hash first
content_hash = file_hash(source_path)
# Upload to IPFS first to get the CID (primary identifier)
cid = ipfs_client.add_file(source_path)
if not cid:
# Fallback to local hash if IPFS unavailable
cid = file_hash(source_path)
logger.warning(f"IPFS unavailable, using local hash: {cid[:16]}...")
# Use content_hash as node_id if not provided
# This is for legacy/uploaded files that don't have a DAG node
# Use CID as node_id if not provided
if node_id is None:
node_id = content_hash
node_id = cid
# Check if already cached (by node_id)
existing = self.cache.get_entry(node_id)
if existing and existing.output_path.exists():
# Already cached - still try to get IPFS CID if we don't have it
ipfs_cid = self._get_ipfs_cid_from_index(content_hash)
if not ipfs_cid:
ipfs_cid = ipfs_client.add_file(existing.output_path)
if ipfs_cid:
self._set_ipfs_index(content_hash, ipfs_cid)
return CachedFile.from_cache_entry(existing), ipfs_cid
return CachedFile.from_cache_entry(existing), cid
# Store in local cache
self.cache.put(
@@ -412,16 +409,12 @@ class L1CacheManager:
entry = self.cache.get_entry(node_id)
# Update content index (Redis + local)
self._set_content_index(entry.content_hash, node_id)
# Update content index (CID -> node_id mapping)
self._set_content_index(cid, node_id)
# Upload to IPFS (async in background would be better, but sync for now)
ipfs_cid = ipfs_client.add_file(entry.output_path)
if ipfs_cid:
self._set_ipfs_index(entry.content_hash, ipfs_cid)
logger.info(f"Uploaded to IPFS: {entry.content_hash[:16]}... -> {ipfs_cid}")
logger.info(f"Cached: {cid[:16]}...")
return CachedFile.from_cache_entry(entry), ipfs_cid
return CachedFile.from_cache_entry(entry), cid
def get_by_node_id(self, node_id: str) -> Optional[Path]:
"""Get cached file path by node_id."""
@@ -432,46 +425,46 @@ class L1CacheManager:
# CIDv0 starts with "Qm", CIDv1 starts with "bafy" or other multibase prefixes
return identifier.startswith("Qm") or identifier.startswith("bafy") or identifier.startswith("baf")
def get_by_content_hash(self, content_hash: str) -> Optional[Path]:
"""Get cached file path by content_hash or IPFS CID. Falls back to IPFS if not in local cache."""
def get_by_cid(self, cid: str) -> Optional[Path]:
"""Get cached file path by cid or IPFS CID. Falls back to IPFS if not in local cache."""
# If it looks like an IPFS CID, use get_by_cid instead
if self._is_ipfs_cid(content_hash):
return self.get_by_cid(content_hash)
if self._is_ipfs_cid(cid):
return self.get_by_cid(cid)
# Check index first (Redis then local)
node_id = self._get_content_index(content_hash)
node_id = self._get_content_index(cid)
if node_id:
path = self.cache.get(node_id)
if path and path.exists():
logger.debug(f" Found via index: {path}")
return path
# For uploads, node_id == content_hash, so try direct lookup
# For uploads, node_id == cid, so try direct lookup
# This works even if cache index hasn't been reloaded
path = self.cache.get(content_hash)
logger.debug(f" cache.get({content_hash[:16]}...) returned: {path}")
path = self.cache.get(cid)
logger.debug(f" cache.get({cid[:16]}...) returned: {path}")
if path and path.exists():
self._set_content_index(content_hash, content_hash)
self._set_content_index(cid, cid)
return path
# Scan cache entries (fallback for new structure)
entry = self.cache.find_by_content_hash(content_hash)
entry = self.cache.find_by_cid(cid)
if entry and entry.output_path.exists():
logger.debug(f" Found via scan: {entry.output_path}")
self._set_content_index(content_hash, entry.node_id)
self._set_content_index(cid, entry.node_id)
return entry.output_path
# Check legacy location (files stored directly as CACHE_DIR/{content_hash})
legacy_path = self.cache_dir / content_hash
# Check legacy location (files stored directly as CACHE_DIR/{cid})
legacy_path = self.cache_dir / cid
if legacy_path.exists() and legacy_path.is_file():
return legacy_path
# Try to recover from IPFS if we have a CID
ipfs_cid = self._get_ipfs_cid_from_index(content_hash)
ipfs_cid = self._get_ipfs_cid_from_index(cid)
if ipfs_cid:
logger.info(f"Recovering from IPFS: {content_hash[:16]}... ({ipfs_cid})")
recovery_path = self.legacy_dir / content_hash
logger.info(f"Recovering from IPFS: {cid[:16]}... ({ipfs_cid})")
recovery_path = self.legacy_dir / cid
if ipfs_client.get_file(ipfs_cid, recovery_path):
logger.info(f"Recovered from IPFS: {recovery_path}")
return recovery_path
@@ -504,16 +497,16 @@ class L1CacheManager:
return None
def has_content(self, content_hash: str) -> bool:
def has_content(self, cid: str) -> bool:
"""Check if content exists in cache."""
return self.get_by_content_hash(content_hash) is not None
return self.get_by_cid(cid) is not None
def get_entry_by_content_hash(self, content_hash: str) -> Optional[CacheEntry]:
"""Get cache entry by content_hash."""
node_id = self._get_content_index(content_hash)
def get_entry_by_cid(self, cid: str) -> Optional[CacheEntry]:
"""Get cache entry by cid."""
node_id = self._get_content_index(cid)
if node_id:
return self.cache.get_entry(node_id)
return self.cache.find_by_content_hash(content_hash)
return self.cache.find_by_cid(cid)
def list_all(self) -> List[CachedFile]:
"""List all cached files."""
@@ -523,11 +516,11 @@ class L1CacheManager:
# New cache structure entries
for entry in self.cache.list_entries():
files.append(CachedFile.from_cache_entry(entry))
if entry.content_hash:
seen_hashes.add(entry.content_hash)
if entry.cid:
seen_hashes.add(entry.cid)
# Legacy files stored directly in cache_dir (old structure)
# These are files named by content_hash directly in CACHE_DIR
# These are files named by cid directly in CACHE_DIR
for f in self.cache_dir.iterdir():
# Skip directories and special files
if not f.is_file():
@@ -544,7 +537,7 @@ class L1CacheManager:
files.append(CachedFile(
node_id=f.name,
content_hash=f.name,
cid=f.name,
path=f,
size_bytes=f.stat().st_size,
node_type="legacy",
@@ -566,8 +559,8 @@ class L1CacheManager:
"""
hashes = []
for entry in self.cache.list_entries():
if entry.node_type == node_type and entry.content_hash:
hashes.append(entry.content_hash)
if entry.node_type == node_type and entry.cid:
hashes.append(entry.cid)
return hashes
# ============ Activity Tracking ============
@@ -590,19 +583,19 @@ class L1CacheManager:
def record_simple_activity(
self,
input_hashes: List[str],
output_hash: str,
output_cid: str,
run_id: str = None,
) -> Activity:
"""
Record a simple (non-DAG) execution as an activity.
For legacy single-effect runs that don't use full DAG execution.
Uses content_hash as node_id.
Uses cid as node_id.
"""
activity = Activity(
activity_id=run_id or str(hash((tuple(input_hashes), output_hash))),
activity_id=run_id or str(hash((tuple(input_hashes), output_cid))),
input_ids=sorted(input_hashes),
output_id=output_hash,
output_id=output_cid,
intermediate_ids=[],
created_at=datetime.now(timezone.utc).timestamp(),
status="completed",
@@ -624,7 +617,7 @@ class L1CacheManager:
# ============ Deletion Rules ============
def can_delete(self, content_hash: str) -> tuple[bool, str]:
def can_delete(self, cid: str) -> tuple[bool, str]:
"""
Check if a cached item can be deleted.
@@ -632,12 +625,12 @@ class L1CacheManager:
(can_delete, reason) tuple
"""
# Check if pinned (published or input to published)
pinned, reason = self.is_pinned(content_hash)
pinned, reason = self.is_pinned(cid)
if pinned:
return False, f"Item is pinned ({reason})"
# Find node_id for this content
node_id = self._get_content_index(content_hash) or content_hash
node_id = self._get_content_index(cid) or cid
# Check if it's an input or output of any activity
for activity in self.activity_store.list():
@@ -663,34 +656,34 @@ class L1CacheManager:
for node_id in activity.all_node_ids:
entry = self.cache.get_entry(node_id)
if entry:
pinned, reason = self.is_pinned(entry.content_hash)
pinned, reason = self.is_pinned(entry.cid)
if pinned:
return False, f"Item {node_id} is pinned ({reason})"
return True, "OK"
def delete_by_content_hash(self, content_hash: str) -> tuple[bool, str]:
def delete_by_cid(self, cid: str) -> tuple[bool, str]:
"""
Delete a cached item by content_hash.
Delete a cached item by cid.
Enforces deletion rules.
Returns:
(success, message) tuple
"""
can_delete, reason = self.can_delete(content_hash)
can_delete, reason = self.can_delete(cid)
if not can_delete:
return False, reason
# Find and delete
node_id = self._get_content_index(content_hash)
node_id = self._get_content_index(cid)
if node_id:
self.cache.remove(node_id)
self._del_content_index(content_hash)
self._del_content_index(cid)
return True, "Deleted"
# Try legacy
legacy_path = self.legacy_dir / content_hash
legacy_path = self.legacy_dir / cid
if legacy_path.exists():
legacy_path.unlink()
return True, "Deleted (legacy)"
@@ -732,7 +725,7 @@ class L1CacheManager:
if activity.output_id:
entry = self.cache.get_entry(activity.output_id)
if entry:
pinned, reason = self.is_pinned(entry.content_hash)
pinned, reason = self.is_pinned(entry.cid)
if pinned:
return False, f"Output is pinned ({reason})"
@@ -743,9 +736,9 @@ class L1CacheManager:
# Remove from cache
self.cache.remove(activity.output_id)
# Remove from content index (Redis + local)
self._del_content_index(entry.content_hash)
self._del_content_index(entry.cid)
# Delete from legacy dir if exists
legacy_path = self.legacy_dir / entry.content_hash
legacy_path = self.legacy_dir / entry.cid
if legacy_path.exists():
legacy_path.unlink()
@@ -754,8 +747,8 @@ class L1CacheManager:
entry = self.cache.get_entry(node_id)
if entry:
self.cache.remove(node_id)
self._del_content_index(entry.content_hash)
legacy_path = self.legacy_dir / entry.content_hash
self._del_content_index(entry.cid)
legacy_path = self.legacy_dir / entry.cid
if legacy_path.exists():
legacy_path.unlink()
@@ -777,13 +770,13 @@ class L1CacheManager:
# ============ L2 Integration ============
def mark_published(self, content_hash: str):
"""Mark a content_hash as published to L2."""
self.l2_checker.mark_shared(content_hash)
def mark_published(self, cid: str):
"""Mark a cid as published to L2."""
self.l2_checker.mark_shared(cid)
def invalidate_shared_cache(self, content_hash: str):
def invalidate_shared_cache(self, cid: str):
"""Invalidate shared status cache (call if item might be unpublished)."""
self.l2_checker.invalidate(content_hash)
self.l2_checker.invalidate(cid)
# ============ Stats ============

View File

@@ -19,7 +19,7 @@ SCHEMA_SQL = """
-- Core cache: just content hash and IPFS CID
-- Physical file storage - shared by all users
CREATE TABLE IF NOT EXISTS cache_items (
content_hash VARCHAR(64) PRIMARY KEY,
cid VARCHAR(64) PRIMARY KEY,
ipfs_cid VARCHAR(128),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
@@ -28,7 +28,7 @@ CREATE TABLE IF NOT EXISTS cache_items (
-- actor_id format: @username@server (ActivityPub style)
CREATE TABLE IF NOT EXISTS item_types (
id SERIAL PRIMARY KEY,
content_hash VARCHAR(64) REFERENCES cache_items(content_hash) ON DELETE CASCADE,
cid VARCHAR(64) REFERENCES cache_items(cid) ON DELETE CASCADE,
actor_id VARCHAR(255) NOT NULL,
type VARCHAR(50) NOT NULL,
path VARCHAR(255),
@@ -40,7 +40,7 @@ CREATE TABLE IF NOT EXISTS item_types (
filename VARCHAR(255),
metadata JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(content_hash, actor_id, type, path)
UNIQUE(cid, actor_id, type, path)
);
-- Add columns if they don't exist (for existing databases)
@@ -61,7 +61,7 @@ CREATE TABLE IF NOT EXISTS pin_reasons (
-- L2 shares: per-user shares (includes content_type for role when shared)
CREATE TABLE IF NOT EXISTS l2_shares (
id SERIAL PRIMARY KEY,
content_hash VARCHAR(64) REFERENCES cache_items(content_hash) ON DELETE CASCADE,
cid VARCHAR(64) REFERENCES cache_items(cid) ON DELETE CASCADE,
actor_id VARCHAR(255) NOT NULL,
l2_server VARCHAR(255) NOT NULL,
asset_name VARCHAR(255) NOT NULL,
@@ -69,7 +69,7 @@ CREATE TABLE IF NOT EXISTS l2_shares (
content_type VARCHAR(50) NOT NULL,
published_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
last_synced_at TIMESTAMP WITH TIME ZONE,
UNIQUE(content_hash, actor_id, l2_server, content_type)
UNIQUE(cid, actor_id, l2_server, content_type)
);
-- Add activity_id column if it doesn't exist (for existing databases)
@@ -82,7 +82,7 @@ END $$;
-- run_id is a hash of (sorted inputs + recipe), making runs deterministic
CREATE TABLE IF NOT EXISTS run_cache (
run_id VARCHAR(64) PRIMARY KEY,
output_hash VARCHAR(64) NOT NULL,
output_cid VARCHAR(64) NOT NULL,
ipfs_cid VARCHAR(128),
provenance_cid VARCHAR(128),
recipe VARCHAR(255) NOT NULL,
@@ -128,27 +128,27 @@ CREATE TABLE IF NOT EXISTS storage_backends (
-- Storage pins tracking (what's pinned where)
CREATE TABLE IF NOT EXISTS storage_pins (
id SERIAL PRIMARY KEY,
content_hash VARCHAR(64) NOT NULL,
cid VARCHAR(64) NOT NULL,
storage_id INTEGER NOT NULL REFERENCES storage_backends(id) ON DELETE CASCADE,
ipfs_cid VARCHAR(128),
pin_type VARCHAR(20) NOT NULL, -- 'user_content', 'donated', 'system'
size_bytes BIGINT,
pinned_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(content_hash, storage_id)
UNIQUE(cid, storage_id)
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_item_types_content_hash ON item_types(content_hash);
CREATE INDEX IF NOT EXISTS idx_item_types_cid ON item_types(cid);
CREATE INDEX IF NOT EXISTS idx_item_types_actor_id ON item_types(actor_id);
CREATE INDEX IF NOT EXISTS idx_item_types_type ON item_types(type);
CREATE INDEX IF NOT EXISTS idx_item_types_path ON item_types(path);
CREATE INDEX IF NOT EXISTS idx_pin_reasons_item_type ON pin_reasons(item_type_id);
CREATE INDEX IF NOT EXISTS idx_l2_shares_content_hash ON l2_shares(content_hash);
CREATE INDEX IF NOT EXISTS idx_l2_shares_cid ON l2_shares(cid);
CREATE INDEX IF NOT EXISTS idx_l2_shares_actor_id ON l2_shares(actor_id);
CREATE INDEX IF NOT EXISTS idx_run_cache_output ON run_cache(output_hash);
CREATE INDEX IF NOT EXISTS idx_run_cache_output ON run_cache(output_cid);
CREATE INDEX IF NOT EXISTS idx_storage_backends_actor ON storage_backends(actor_id);
CREATE INDEX IF NOT EXISTS idx_storage_backends_type ON storage_backends(provider_type);
CREATE INDEX IF NOT EXISTS idx_storage_pins_hash ON storage_pins(content_hash);
CREATE INDEX IF NOT EXISTS idx_storage_pins_hash ON storage_pins(cid);
CREATE INDEX IF NOT EXISTS idx_storage_pins_storage ON storage_pins(storage_id);
"""
@@ -171,47 +171,47 @@ async def close_db():
# ============ Cache Items ============
async def create_cache_item(content_hash: str, ipfs_cid: Optional[str] = None) -> dict:
async def create_cache_item(cid: str, ipfs_cid: Optional[str] = None) -> dict:
"""Create a cache item. Returns the created item."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
INSERT INTO cache_items (content_hash, ipfs_cid)
INSERT INTO cache_items (cid, ipfs_cid)
VALUES ($1, $2)
ON CONFLICT (content_hash) DO UPDATE SET ipfs_cid = COALESCE($2, cache_items.ipfs_cid)
RETURNING content_hash, ipfs_cid, created_at
ON CONFLICT (cid) DO UPDATE SET ipfs_cid = COALESCE($2, cache_items.ipfs_cid)
RETURNING cid, ipfs_cid, created_at
""",
content_hash, ipfs_cid
cid, ipfs_cid
)
return dict(row)
async def get_cache_item(content_hash: str) -> Optional[dict]:
async def get_cache_item(cid: str) -> Optional[dict]:
"""Get a cache item by content hash."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT content_hash, ipfs_cid, created_at FROM cache_items WHERE content_hash = $1",
content_hash
"SELECT cid, ipfs_cid, created_at FROM cache_items WHERE cid = $1",
cid
)
return dict(row) if row else None
async def update_cache_item_ipfs_cid(content_hash: str, ipfs_cid: str) -> bool:
async def update_cache_item_ipfs_cid(cid: str, ipfs_cid: str) -> bool:
"""Update the IPFS CID for a cache item."""
async with pool.acquire() as conn:
result = await conn.execute(
"UPDATE cache_items SET ipfs_cid = $2 WHERE content_hash = $1",
content_hash, ipfs_cid
"UPDATE cache_items SET ipfs_cid = $2 WHERE cid = $1",
cid, ipfs_cid
)
return result == "UPDATE 1"
async def delete_cache_item(content_hash: str) -> bool:
async def delete_cache_item(cid: str) -> bool:
"""Delete a cache item and all associated data (cascades)."""
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM cache_items WHERE content_hash = $1",
content_hash
"DELETE FROM cache_items WHERE cid = $1",
cid
)
return result == "DELETE 1"
@@ -221,7 +221,7 @@ async def list_cache_items(limit: int = 100, offset: int = 0) -> List[dict]:
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT content_hash, ipfs_cid, created_at
SELECT cid, ipfs_cid, created_at
FROM cache_items
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
@@ -234,7 +234,7 @@ async def list_cache_items(limit: int = 100, offset: int = 0) -> List[dict]:
# ============ Item Types ============
async def add_item_type(
content_hash: str,
cid: str,
actor_id: str,
item_type: str,
path: Optional[str] = None,
@@ -247,72 +247,72 @@ async def add_item_type(
async with pool.acquire() as conn:
# Ensure cache_item exists
await conn.execute(
"INSERT INTO cache_items (content_hash) VALUES ($1) ON CONFLICT DO NOTHING",
content_hash
"INSERT INTO cache_items (cid) VALUES ($1) ON CONFLICT DO NOTHING",
cid
)
# Insert or update item_type
row = await conn.fetchrow(
"""
INSERT INTO item_types (content_hash, actor_id, type, path, description, source_type, source_url, source_note)
INSERT INTO item_types (cid, actor_id, type, path, description, source_type, source_url, source_note)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (content_hash, actor_id, type, path) DO UPDATE SET
ON CONFLICT (cid, actor_id, type, path) DO UPDATE SET
description = COALESCE($5, item_types.description),
source_type = COALESCE($6, item_types.source_type),
source_url = COALESCE($7, item_types.source_url),
source_note = COALESCE($8, item_types.source_note)
RETURNING id, content_hash, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
RETURNING id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
""",
content_hash, actor_id, item_type, path, description, source_type, source_url, source_note
cid, actor_id, item_type, path, description, source_type, source_url, source_note
)
return dict(row)
async def get_item_types(content_hash: str, actor_id: Optional[str] = None) -> List[dict]:
async def get_item_types(cid: str, actor_id: Optional[str] = None) -> List[dict]:
"""Get types for a cache item, optionally filtered by user."""
async with pool.acquire() as conn:
if actor_id:
rows = await conn.fetch(
"""
SELECT id, content_hash, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
FROM item_types
WHERE content_hash = $1 AND actor_id = $2
WHERE cid = $1 AND actor_id = $2
ORDER BY created_at
""",
content_hash, actor_id
cid, actor_id
)
else:
rows = await conn.fetch(
"""
SELECT id, content_hash, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
FROM item_types
WHERE content_hash = $1
WHERE cid = $1
ORDER BY created_at
""",
content_hash
cid
)
return [dict(row) for row in rows]
async def get_item_type(content_hash: str, actor_id: str, item_type: str, path: Optional[str] = None) -> Optional[dict]:
async def get_item_type(cid: str, actor_id: str, item_type: str, path: Optional[str] = None) -> Optional[dict]:
"""Get a specific type for a cache item and user."""
async with pool.acquire() as conn:
if path is None:
row = await conn.fetchrow(
"""
SELECT id, content_hash, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
FROM item_types
WHERE content_hash = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
""",
content_hash, actor_id, item_type
cid, actor_id, item_type
)
else:
row = await conn.fetchrow(
"""
SELECT id, content_hash, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
FROM item_types
WHERE content_hash = $1 AND actor_id = $2 AND type = $3 AND path = $4
WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path = $4
""",
content_hash, actor_id, item_type, path
cid, actor_id, item_type, path
)
return dict(row) if row else None
@@ -340,18 +340,18 @@ async def update_item_type(
return result == "UPDATE 1"
async def delete_item_type(content_hash: str, actor_id: str, item_type: str, path: Optional[str] = None) -> bool:
async def delete_item_type(cid: str, actor_id: str, item_type: str, path: Optional[str] = None) -> bool:
"""Delete a specific type from a cache item for a user."""
async with pool.acquire() as conn:
if path is None:
result = await conn.execute(
"DELETE FROM item_types WHERE content_hash = $1 AND actor_id = $2 AND type = $3 AND path IS NULL",
content_hash, actor_id, item_type
"DELETE FROM item_types WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL",
cid, actor_id, item_type
)
else:
result = await conn.execute(
"DELETE FROM item_types WHERE content_hash = $1 AND actor_id = $2 AND type = $3 AND path = $4",
content_hash, actor_id, item_type, path
"DELETE FROM item_types WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path = $4",
cid, actor_id, item_type, path
)
return result == "DELETE 1"
@@ -362,11 +362,11 @@ async def list_items_by_type(item_type: str, actor_id: Optional[str] = None, lim
if actor_id:
rows = await conn.fetch(
"""
SELECT it.id, it.content_hash, it.actor_id, it.type, it.path, it.description,
SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
ci.ipfs_cid
FROM item_types it
JOIN cache_items ci ON it.content_hash = ci.content_hash
JOIN cache_items ci ON it.cid = ci.cid
WHERE it.type = $1 AND it.actor_id = $2
ORDER BY it.created_at DESC
LIMIT $3 OFFSET $4
@@ -376,11 +376,11 @@ async def list_items_by_type(item_type: str, actor_id: Optional[str] = None, lim
else:
rows = await conn.fetch(
"""
SELECT it.id, it.content_hash, it.actor_id, it.type, it.path, it.description,
SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
ci.ipfs_cid
FROM item_types it
JOIN cache_items ci ON it.content_hash = ci.content_hash
JOIN cache_items ci ON it.cid = ci.cid
WHERE it.type = $1
ORDER BY it.created_at DESC
LIMIT $2 OFFSET $3
@@ -396,11 +396,11 @@ async def get_item_by_path(item_type: str, path: str, actor_id: Optional[str] =
if actor_id:
row = await conn.fetchrow(
"""
SELECT it.id, it.content_hash, it.actor_id, it.type, it.path, it.description,
SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
ci.ipfs_cid
FROM item_types it
JOIN cache_items ci ON it.content_hash = ci.content_hash
JOIN cache_items ci ON it.cid = ci.cid
WHERE it.type = $1 AND it.path = $2 AND it.actor_id = $3
""",
item_type, path, actor_id
@@ -408,11 +408,11 @@ async def get_item_by_path(item_type: str, path: str, actor_id: Optional[str] =
else:
row = await conn.fetchrow(
"""
SELECT it.id, it.content_hash, it.actor_id, it.type, it.path, it.description,
SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
ci.ipfs_cid
FROM item_types it
JOIN cache_items ci ON it.content_hash = ci.content_hash
JOIN cache_items ci ON it.cid = ci.cid
WHERE it.type = $1 AND it.path = $2
""",
item_type, path
@@ -480,7 +480,7 @@ async def get_pin_reasons(item_type_id: int) -> List[dict]:
return [dict(row) for row in rows]
async def is_item_pinned(content_hash: str, item_type: Optional[str] = None) -> tuple[bool, List[str]]:
async def is_item_pinned(cid: str, item_type: Optional[str] = None) -> tuple[bool, List[str]]:
"""Check if any type of a cache item is pinned. Returns (is_pinned, reasons)."""
async with pool.acquire() as conn:
if item_type:
@@ -489,9 +489,9 @@ async def is_item_pinned(content_hash: str, item_type: Optional[str] = None) ->
SELECT pr.reason
FROM pin_reasons pr
JOIN item_types it ON pr.item_type_id = it.id
WHERE it.content_hash = $1 AND it.type = $2 AND it.pinned = TRUE
WHERE it.cid = $1 AND it.type = $2 AND it.pinned = TRUE
""",
content_hash, item_type
cid, item_type
)
else:
rows = await conn.fetch(
@@ -499,9 +499,9 @@ async def is_item_pinned(content_hash: str, item_type: Optional[str] = None) ->
SELECT pr.reason
FROM pin_reasons pr
JOIN item_types it ON pr.item_type_id = it.id
WHERE it.content_hash = $1 AND it.pinned = TRUE
WHERE it.cid = $1 AND it.pinned = TRUE
""",
content_hash
cid
)
reasons = [row["reason"] for row in rows]
return len(reasons) > 0, reasons
@@ -510,7 +510,7 @@ async def is_item_pinned(content_hash: str, item_type: Optional[str] = None) ->
# ============ L2 Shares ============
async def add_l2_share(
content_hash: str,
cid: str,
actor_id: str,
l2_server: str,
asset_name: str,
@@ -520,85 +520,85 @@ async def add_l2_share(
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
INSERT INTO l2_shares (content_hash, actor_id, l2_server, asset_name, content_type, last_synced_at)
INSERT INTO l2_shares (cid, actor_id, l2_server, asset_name, content_type, last_synced_at)
VALUES ($1, $2, $3, $4, $5, NOW())
ON CONFLICT (content_hash, actor_id, l2_server, content_type) DO UPDATE SET
ON CONFLICT (cid, actor_id, l2_server, content_type) DO UPDATE SET
asset_name = $4,
last_synced_at = NOW()
RETURNING id, content_hash, actor_id, l2_server, asset_name, content_type, published_at, last_synced_at
RETURNING id, cid, actor_id, l2_server, asset_name, content_type, published_at, last_synced_at
""",
content_hash, actor_id, l2_server, asset_name, content_type
cid, actor_id, l2_server, asset_name, content_type
)
return dict(row)
async def get_l2_shares(content_hash: str, actor_id: Optional[str] = None) -> List[dict]:
async def get_l2_shares(cid: str, actor_id: Optional[str] = None) -> List[dict]:
"""Get L2 shares for a cache item, optionally filtered by user."""
async with pool.acquire() as conn:
if actor_id:
rows = await conn.fetch(
"""
SELECT id, content_hash, actor_id, l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
SELECT id, cid, actor_id, l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
FROM l2_shares
WHERE content_hash = $1 AND actor_id = $2
WHERE cid = $1 AND actor_id = $2
ORDER BY published_at
""",
content_hash, actor_id
cid, actor_id
)
else:
rows = await conn.fetch(
"""
SELECT id, content_hash, actor_id, l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
SELECT id, cid, actor_id, l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
FROM l2_shares
WHERE content_hash = $1
WHERE cid = $1
ORDER BY published_at
""",
content_hash
cid
)
return [dict(row) for row in rows]
async def delete_l2_share(content_hash: str, actor_id: str, l2_server: str, content_type: str) -> bool:
async def delete_l2_share(cid: str, actor_id: str, l2_server: str, content_type: str) -> bool:
"""Delete an L2 share for a user."""
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM l2_shares WHERE content_hash = $1 AND actor_id = $2 AND l2_server = $3 AND content_type = $4",
content_hash, actor_id, l2_server, content_type
"DELETE FROM l2_shares WHERE cid = $1 AND actor_id = $2 AND l2_server = $3 AND content_type = $4",
cid, actor_id, l2_server, content_type
)
return result == "DELETE 1"
# ============ Cache Item Cleanup ============
async def has_remaining_references(content_hash: str) -> bool:
async def has_remaining_references(cid: str) -> bool:
"""Check if a cache item has any remaining item_types or l2_shares."""
async with pool.acquire() as conn:
item_types_count = await conn.fetchval(
"SELECT COUNT(*) FROM item_types WHERE content_hash = $1",
content_hash
"SELECT COUNT(*) FROM item_types WHERE cid = $1",
cid
)
if item_types_count > 0:
return True
l2_shares_count = await conn.fetchval(
"SELECT COUNT(*) FROM l2_shares WHERE content_hash = $1",
content_hash
"SELECT COUNT(*) FROM l2_shares WHERE cid = $1",
cid
)
return l2_shares_count > 0
async def cleanup_orphaned_cache_item(content_hash: str) -> bool:
async def cleanup_orphaned_cache_item(cid: str) -> bool:
"""Delete a cache item if it has no remaining references. Returns True if deleted."""
async with pool.acquire() as conn:
# Only delete if no item_types or l2_shares reference it
result = await conn.execute(
"""
DELETE FROM cache_items
WHERE content_hash = $1
AND NOT EXISTS (SELECT 1 FROM item_types WHERE content_hash = $1)
AND NOT EXISTS (SELECT 1 FROM l2_shares WHERE content_hash = $1)
WHERE cid = $1
AND NOT EXISTS (SELECT 1 FROM item_types WHERE cid = $1)
AND NOT EXISTS (SELECT 1 FROM l2_shares WHERE cid = $1)
""",
content_hash
cid
)
return result == "DELETE 1"
@@ -610,7 +610,7 @@ import json as _json
async def save_item_metadata(
content_hash: str,
cid: str,
actor_id: str,
item_type: str = "media",
filename: Optional[str] = None,
@@ -643,16 +643,16 @@ async def save_item_metadata(
async with pool.acquire() as conn:
# Ensure cache_item exists
await conn.execute(
"INSERT INTO cache_items (content_hash) VALUES ($1) ON CONFLICT DO NOTHING",
content_hash
"INSERT INTO cache_items (cid) VALUES ($1) ON CONFLICT DO NOTHING",
cid
)
# Upsert item_type
row = await conn.fetchrow(
"""
INSERT INTO item_types (content_hash, actor_id, type, description, source_type, source_url, source_note, pinned, filename, metadata)
INSERT INTO item_types (cid, actor_id, type, description, source_type, source_url, source_note, pinned, filename, metadata)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
ON CONFLICT (content_hash, actor_id, type, path) DO UPDATE SET
ON CONFLICT (cid, actor_id, type, path) DO UPDATE SET
description = COALESCE(EXCLUDED.description, item_types.description),
source_type = COALESCE(EXCLUDED.source_type, item_types.source_type),
source_url = COALESCE(EXCLUDED.source_url, item_types.source_url),
@@ -660,9 +660,9 @@ async def save_item_metadata(
pinned = EXCLUDED.pinned,
filename = COALESCE(EXCLUDED.filename, item_types.filename),
metadata = item_types.metadata || EXCLUDED.metadata
RETURNING id, content_hash, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at
RETURNING id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at
""",
content_hash, actor_id, item_type, description, source_type, source_url, source_note, pinned, filename, _json.dumps(metadata)
cid, actor_id, item_type, description, source_type, source_url, source_note, pinned, filename, _json.dumps(metadata)
)
item_type_id = row["id"]
@@ -719,7 +719,7 @@ async def save_item_metadata(
return result
async def load_item_metadata(content_hash: str, actor_id: Optional[str] = None) -> dict:
async def load_item_metadata(cid: str, actor_id: Optional[str] = None) -> dict:
"""
Load item metadata from the database.
@@ -731,8 +731,8 @@ async def load_item_metadata(content_hash: str, actor_id: Optional[str] = None)
async with pool.acquire() as conn:
# Get cache item
cache_item = await conn.fetchrow(
"SELECT content_hash, ipfs_cid, created_at FROM cache_items WHERE content_hash = $1",
content_hash
"SELECT cid, ipfs_cid, created_at FROM cache_items WHERE cid = $1",
cid
)
if not cache_item:
@@ -743,19 +743,19 @@ async def load_item_metadata(content_hash: str, actor_id: Optional[str] = None)
item_types = await conn.fetch(
"""
SELECT id, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at
FROM item_types WHERE content_hash = $1 AND actor_id = $2
FROM item_types WHERE cid = $1 AND actor_id = $2
ORDER BY created_at
""",
content_hash, actor_id
cid, actor_id
)
else:
item_types = await conn.fetch(
"""
SELECT id, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at
FROM item_types WHERE content_hash = $1
FROM item_types WHERE cid = $1
ORDER BY created_at
""",
content_hash
cid
)
if not item_types:
@@ -807,17 +807,17 @@ async def load_item_metadata(content_hash: str, actor_id: Optional[str] = None)
shares = await conn.fetch(
"""
SELECT l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
FROM l2_shares WHERE content_hash = $1 AND actor_id = $2
FROM l2_shares WHERE cid = $1 AND actor_id = $2
""",
content_hash, actor_id
cid, actor_id
)
else:
shares = await conn.fetch(
"""
SELECT l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
FROM l2_shares WHERE content_hash = $1
FROM l2_shares WHERE cid = $1
""",
content_hash
cid
)
if shares:
@@ -845,7 +845,7 @@ async def load_item_metadata(content_hash: str, actor_id: Optional[str] = None)
async def update_item_metadata(
content_hash: str,
cid: str,
actor_id: str,
item_type: str = "media",
**updates
@@ -880,15 +880,15 @@ async def update_item_metadata(
existing = await conn.fetchrow(
"""
SELECT id, metadata FROM item_types
WHERE content_hash = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
""",
content_hash, actor_id, item_type
cid, actor_id, item_type
)
if not existing:
# Create new entry
return await save_item_metadata(
content_hash, actor_id, item_type,
cid, actor_id, item_type,
filename=filename, description=description,
source_type=source_type, source_url=source_url, source_note=source_note,
pinned=pinned or False, pin_reason=pin_reason,
@@ -898,7 +898,7 @@ async def update_item_metadata(
# Build update query dynamically
set_parts = []
params = [content_hash, actor_id, item_type]
params = [cid, actor_id, item_type]
param_idx = 4
if description is not None:
@@ -949,7 +949,7 @@ async def update_item_metadata(
if set_parts:
query = f"""
UPDATE item_types SET {', '.join(set_parts)}
WHERE content_hash = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
"""
await conn.execute(query, *params)
@@ -964,11 +964,11 @@ async def update_item_metadata(
existing["id"], pin_reason
)
return await load_item_metadata(content_hash, actor_id)
return await load_item_metadata(cid, actor_id)
async def save_l2_share(
content_hash: str,
cid: str,
actor_id: str,
l2_server: str,
asset_name: str,
@@ -979,15 +979,15 @@ async def save_l2_share(
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
INSERT INTO l2_shares (content_hash, actor_id, l2_server, asset_name, activity_id, content_type, last_synced_at)
INSERT INTO l2_shares (cid, actor_id, l2_server, asset_name, activity_id, content_type, last_synced_at)
VALUES ($1, $2, $3, $4, $5, $6, NOW())
ON CONFLICT (content_hash, actor_id, l2_server, content_type) DO UPDATE SET
ON CONFLICT (cid, actor_id, l2_server, content_type) DO UPDATE SET
asset_name = EXCLUDED.asset_name,
activity_id = COALESCE(EXCLUDED.activity_id, l2_shares.activity_id),
last_synced_at = NOW()
RETURNING l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
""",
content_hash, actor_id, l2_server, asset_name, activity_id, content_type
cid, actor_id, l2_server, asset_name, activity_id, content_type
)
return {
"l2_server": row["l2_server"],
@@ -1000,19 +1000,19 @@ async def save_l2_share(
async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit: int = 100, offset: int = 0) -> List[dict]:
"""Get all items for a user, optionally filtered by type. Deduplicates by content_hash."""
"""Get all items for a user, optionally filtered by type. Deduplicates by cid."""
async with pool.acquire() as conn:
if item_type:
rows = await conn.fetch(
"""
SELECT * FROM (
SELECT DISTINCT ON (it.content_hash)
it.content_hash, it.type, it.description, it.filename, it.pinned, it.created_at,
SELECT DISTINCT ON (it.cid)
it.cid, it.type, it.description, it.filename, it.pinned, it.created_at,
ci.ipfs_cid
FROM item_types it
JOIN cache_items ci ON it.content_hash = ci.content_hash
JOIN cache_items ci ON it.cid = ci.cid
WHERE it.actor_id = $1 AND it.type = $2
ORDER BY it.content_hash, it.created_at DESC
ORDER BY it.cid, it.created_at DESC
) deduped
ORDER BY created_at DESC
LIMIT $3 OFFSET $4
@@ -1023,13 +1023,13 @@ async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit:
rows = await conn.fetch(
"""
SELECT * FROM (
SELECT DISTINCT ON (it.content_hash)
it.content_hash, it.type, it.description, it.filename, it.pinned, it.created_at,
SELECT DISTINCT ON (it.cid)
it.cid, it.type, it.description, it.filename, it.pinned, it.created_at,
ci.ipfs_cid
FROM item_types it
JOIN cache_items ci ON it.content_hash = ci.content_hash
JOIN cache_items ci ON it.cid = ci.cid
WHERE it.actor_id = $1
ORDER BY it.content_hash, it.created_at DESC
ORDER BY it.cid, it.created_at DESC
) deduped
ORDER BY created_at DESC
LIMIT $2 OFFSET $3
@@ -1039,7 +1039,7 @@ async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit:
return [
{
"content_hash": r["content_hash"],
"cid": r["cid"],
"type": r["type"],
"description": r["description"],
"filename": r["filename"],
@@ -1052,16 +1052,16 @@ async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit:
async def count_user_items(actor_id: str, item_type: Optional[str] = None) -> int:
"""Count unique items (by content_hash) for a user."""
"""Count unique items (by cid) for a user."""
async with pool.acquire() as conn:
if item_type:
return await conn.fetchval(
"SELECT COUNT(DISTINCT content_hash) FROM item_types WHERE actor_id = $1 AND type = $2",
"SELECT COUNT(DISTINCT cid) FROM item_types WHERE actor_id = $1 AND type = $2",
actor_id, item_type
)
else:
return await conn.fetchval(
"SELECT COUNT(DISTINCT content_hash) FROM item_types WHERE actor_id = $1",
"SELECT COUNT(DISTINCT cid) FROM item_types WHERE actor_id = $1",
actor_id
)
@@ -1073,7 +1073,7 @@ async def get_run_cache(run_id: str) -> Optional[dict]:
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
SELECT run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
FROM run_cache WHERE run_id = $1
""",
run_id
@@ -1081,7 +1081,7 @@ async def get_run_cache(run_id: str) -> Optional[dict]:
if row:
return {
"run_id": row["run_id"],
"output_hash": row["output_hash"],
"output_cid": row["output_cid"],
"ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"],
"recipe": row["recipe"],
@@ -1094,7 +1094,7 @@ async def get_run_cache(run_id: str) -> Optional[dict]:
async def save_run_cache(
run_id: str,
output_hash: str,
output_cid: str,
recipe: str,
inputs: List[str],
ipfs_cid: Optional[str] = None,
@@ -1105,19 +1105,19 @@ async def save_run_cache(
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
INSERT INTO run_cache (run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id)
INSERT INTO run_cache (run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (run_id) DO UPDATE SET
output_hash = EXCLUDED.output_hash,
output_cid = EXCLUDED.output_cid,
ipfs_cid = COALESCE(EXCLUDED.ipfs_cid, run_cache.ipfs_cid),
provenance_cid = COALESCE(EXCLUDED.provenance_cid, run_cache.provenance_cid)
RETURNING run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
RETURNING run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
""",
run_id, output_hash, ipfs_cid, provenance_cid, recipe, _json.dumps(inputs), actor_id
run_id, output_cid, ipfs_cid, provenance_cid, recipe, _json.dumps(inputs), actor_id
)
return {
"run_id": row["run_id"],
"output_hash": row["output_hash"],
"output_cid": row["output_cid"],
"ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"],
"recipe": row["recipe"],
@@ -1127,20 +1127,20 @@ async def save_run_cache(
}
async def get_run_by_output(output_hash: str) -> Optional[dict]:
async def get_run_by_output(output_cid: str) -> Optional[dict]:
"""Get run cache entry by output hash."""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
FROM run_cache WHERE output_hash = $1
SELECT run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
FROM run_cache WHERE output_cid = $1
""",
output_hash
output_cid
)
if row:
return {
"run_id": row["run_id"],
"output_hash": row["output_hash"],
"output_cid": row["output_cid"],
"ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"],
"recipe": row["recipe"],
@@ -1173,7 +1173,7 @@ async def list_runs_by_actor(actor_id: str, offset: int = 0, limit: int = 20) ->
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
SELECT run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
FROM run_cache
WHERE actor_id = $1
ORDER BY created_at DESC
@@ -1184,7 +1184,7 @@ async def list_runs_by_actor(actor_id: str, offset: int = 0, limit: int = 20) ->
return [
{
"run_id": row["run_id"],
"output_hash": row["output_hash"],
"output_cid": row["output_cid"],
"ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"],
"recipe": row["recipe"],
@@ -1348,7 +1348,7 @@ async def get_all_active_storage() -> List[dict]:
async def add_storage_pin(
content_hash: str,
cid: str,
storage_id: int,
ipfs_cid: Optional[str],
pin_type: str,
@@ -1358,40 +1358,40 @@ async def add_storage_pin(
async with pool.acquire() as conn:
try:
row = await conn.fetchrow(
"""INSERT INTO storage_pins (content_hash, storage_id, ipfs_cid, pin_type, size_bytes)
"""INSERT INTO storage_pins (cid, storage_id, ipfs_cid, pin_type, size_bytes)
VALUES ($1, $2, $3, $4, $5)
ON CONFLICT (content_hash, storage_id) DO UPDATE SET
ON CONFLICT (cid, storage_id) DO UPDATE SET
ipfs_cid = EXCLUDED.ipfs_cid,
pin_type = EXCLUDED.pin_type,
size_bytes = EXCLUDED.size_bytes,
pinned_at = NOW()
RETURNING id""",
content_hash, storage_id, ipfs_cid, pin_type, size_bytes
cid, storage_id, ipfs_cid, pin_type, size_bytes
)
return row["id"] if row else None
except Exception:
return None
async def remove_storage_pin(content_hash: str, storage_id: int) -> bool:
async def remove_storage_pin(cid: str, storage_id: int) -> bool:
"""Remove a pin record."""
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM storage_pins WHERE content_hash = $1 AND storage_id = $2",
content_hash, storage_id
"DELETE FROM storage_pins WHERE cid = $1 AND storage_id = $2",
cid, storage_id
)
return "DELETE 1" in result
async def get_pins_for_content(content_hash: str) -> List[dict]:
async def get_pins_for_content(cid: str) -> List[dict]:
"""Get all storage locations where content is pinned."""
async with pool.acquire() as conn:
rows = await conn.fetch(
"""SELECT sp.*, sb.provider_type, sb.provider_name, sb.actor_id
FROM storage_pins sp
JOIN storage_backends sb ON sp.storage_id = sb.id
WHERE sp.content_hash = $1""",
content_hash
WHERE sp.cid = $1""",
cid
)
return [dict(row) for row in rows]

View File

@@ -120,21 +120,21 @@ class SourceExecutor(Executor):
"""Executor for SOURCE nodes - loads content from cache by hash."""
def execute(self, config: Dict, inputs: List[Path], output_path: Path) -> Path:
# Source nodes load from cache by content_hash
content_hash = config.get("content_hash")
if not content_hash:
raise ValueError("SOURCE node requires content_hash in config")
# Source nodes load from cache by cid
cid = config.get("cid")
if not cid:
raise ValueError("SOURCE node requires cid in config")
# Look up in cache
source_path = CACHE_DIR / content_hash
source_path = CACHE_DIR / cid
if not source_path.exists():
# Try nodes directory
from cache_manager import get_cache_manager
cache_manager = get_cache_manager()
source_path = cache_manager.get_by_content_hash(content_hash)
source_path = cache_manager.get_by_cid(cid)
if not source_path or not source_path.exists():
raise ValueError(f"Source content not in cache: {content_hash}")
raise ValueError(f"Source content not in cache: {cid}")
# For source nodes, we just return the path (no transformation)
# The engine will use this as input to subsequent nodes
@@ -186,7 +186,7 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
# Input comes from cache by hash (supports both legacy and new cache locations)
cache_manager = get_cache_manager()
input_path = cache_manager.get_by_content_hash(input_hash)
input_path = cache_manager.get_by_cid(input_hash)
if not input_path or not input_path.exists():
raise ValueError(f"Input not in cache: {input_hash}")
@@ -214,9 +214,9 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
raise ValueError(f"Unknown effect: {effect_name}")
# Verify output
output_hash = file_hash(result)
if output_hash != expected_hash:
raise ValueError(f"Output hash mismatch: expected {expected_hash}, got {output_hash}")
output_cid = file_hash(result)
if output_cid != expected_hash:
raise ValueError(f"Output hash mismatch: expected {expected_hash}, got {output_cid}")
# Build effect info based on source
if effect_name == "identity":
@@ -224,7 +224,7 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
artdag_commit = get_artdag_commit()
effect_info = {
"name": f"effect:{effect_name}",
"content_hash": REGISTRY[f"effect:{effect_name}"]["hash"],
"cid": REGISTRY[f"effect:{effect_name}"]["hash"],
"repo": "github",
"repo_commit": artdag_commit,
"repo_url": f"https://github.com/gilesbradshaw/art-dag/blob/{artdag_commit}/artdag/nodes/effect.py"
@@ -234,7 +234,7 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
effects_commit = get_effects_commit()
effect_info = {
"name": f"effect:{effect_name}",
"content_hash": REGISTRY[f"effect:{effect_name}"]["hash"],
"cid": REGISTRY[f"effect:{effect_name}"]["hash"],
"repo": "rose-ash",
"repo_commit": effects_commit,
"repo_url": f"https://git.rose-ash.com/art-dag/effects/src/commit/{effects_commit}/{effect_name}"
@@ -247,15 +247,15 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
"rendered_by": "@giles@artdag.rose-ash.com",
"output": {
"name": output_name,
"content_hash": output_hash,
"cid": output_cid,
},
"inputs": [
{"content_hash": input_hash}
{"cid": input_hash}
],
"effects": [effect_info],
"infrastructure": {
"software": {"name": "infra:artdag", "content_hash": REGISTRY["infra:artdag"]["hash"]},
"hardware": {"name": "infra:giles-hp", "content_hash": REGISTRY["infra:giles-hp"]["hash"]}
"software": {"name": "infra:artdag", "cid": REGISTRY["infra:artdag"]["hash"]},
"hardware": {"name": "infra:giles-hp", "cid": REGISTRY["infra:giles-hp"]["hash"]}
}
}
@@ -329,10 +329,10 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
if not result.success:
raise RuntimeError(f"DAG execution failed: {result.error}")
# Index all node outputs by content_hash and upload to IPFS
# Index all node outputs by cid and upload to IPFS
cache_manager = get_cache_manager()
output_hash = None
node_hashes = {} # node_id -> content_hash mapping
output_cid = None
node_hashes = {} # node_id -> cid mapping
node_ipfs_cids = {} # node_id -> ipfs_cid mapping
# Process all node results (intermediates + output)
@@ -341,9 +341,9 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
node = dag.nodes.get(node_id)
# Skip SOURCE nodes - they're already in cache
if node and (node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE"):
content_hash = node.config.get("content_hash")
if content_hash:
node_hashes[node_id] = content_hash
cid = node.config.get("cid")
if cid:
node_hashes[node_id] = cid
continue
# Determine node type for cache metadata
@@ -353,20 +353,20 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
else:
cache_node_type = "dag_intermediate"
# Store in cache_manager (indexes by content_hash, uploads to IPFS)
# Store in cache_manager (indexes by cid, uploads to IPFS)
cached, ipfs_cid = cache_manager.put(
Path(node_path),
node_type=cache_node_type,
node_id=node_id,
)
node_hashes[node_id] = cached.content_hash
node_hashes[node_id] = cached.cid
if ipfs_cid:
node_ipfs_cids[node_id] = ipfs_cid
logger.info(f"Cached node {node_id}: {cached.content_hash[:16]}... -> {ipfs_cid or 'no IPFS'}")
logger.info(f"Cached node {node_id}: {cached.cid[:16]}... -> {ipfs_cid or 'no IPFS'}")
# Get output hash from the output node
if result.output_path and result.output_path.exists():
output_hash = file_hash(result.output_path)
output_cid = file_hash(result.output_path)
output_ipfs_cid = node_ipfs_cids.get(dag.output_id)
# Store output in database (for L2 to query IPFS CID)
@@ -376,14 +376,14 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
async def save_to_db():
if database.pool is None:
await database.init_db()
await database.create_cache_item(output_hash, output_ipfs_cid)
await database.create_cache_item(output_cid, output_ipfs_cid)
# Also save the run result
if run_id:
input_hashes_for_db = [
node.config.get("content_hash")
node.config.get("cid")
for node in dag.nodes.values()
if (node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE")
and node.config.get("content_hash")
and node.config.get("cid")
]
# Get actor_id and recipe from pending_runs (saved when run started)
actor_id = None
@@ -395,7 +395,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
await database.save_run_cache(
run_id=run_id,
output_hash=output_hash,
output_cid=output_cid,
recipe=recipe_name,
inputs=input_hashes_for_db,
ipfs_cid=output_ipfs_cid,
@@ -405,7 +405,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
# Save output as media for the user
if actor_id:
await database.save_item_metadata(
content_hash=output_hash,
cid=output_cid,
actor_id=actor_id,
item_type="media",
description=f"Output from recipe: {recipe_name}",
@@ -431,9 +431,9 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
intermediate_hashes = []
for node_id, node in dag.nodes.items():
if node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE":
content_hash = node.config.get("content_hash")
if content_hash:
input_hashes.append(content_hash)
cid = node.config.get("cid")
if cid:
input_hashes.append(cid)
elif node_id != dag.output_id and node_id in node_hashes:
intermediate_hashes.append(node_hashes[node_id])
@@ -441,9 +441,9 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
from artdag.activities import Activity
from datetime import datetime, timezone
activity = Activity(
activity_id=run_id or f"dag-{output_hash[:16]}",
activity_id=run_id or f"dag-{output_cid[:16]}",
input_ids=sorted(input_hashes),
output_id=output_hash,
output_id=output_cid,
intermediate_ids=intermediate_hashes,
created_at=datetime.now(timezone.utc).timestamp(),
status="completed",
@@ -454,23 +454,23 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
input_hashes_for_provenance = []
for node_id, node in dag.nodes.items():
if node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE":
content_hash = node.config.get("content_hash")
if content_hash:
input_hashes_for_provenance.append({"content_hash": content_hash})
cid = node.config.get("cid")
if cid:
input_hashes_for_provenance.append({"cid": cid})
provenance = {
"task_id": self.request.id,
"run_id": run_id,
"rendered_at": datetime.now(timezone.utc).isoformat(),
"output": {
"content_hash": output_hash,
"cid": output_cid,
"ipfs_cid": node_ipfs_cids.get(dag.output_id) if dag.output_id else None,
},
"inputs": input_hashes_for_provenance,
"dag": dag_json, # Full DAG definition
"nodes": {
node_id: {
"content_hash": node_hashes.get(node_id),
"cid": node_hashes.get(node_id),
"ipfs_cid": node_ipfs_cids.get(node_id),
}
for node_id in dag.nodes.keys()
@@ -496,7 +496,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
return {
"success": True,
"run_id": run_id,
"output_hash": output_hash,
"output_cid": output_cid,
"output_ipfs_cid": node_ipfs_cids.get(dag.output_id) if dag.output_id else None,
"output_path": str(result.output_path) if result.output_path else None,
"execution_time": result.execution_time,
@@ -505,7 +505,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
"node_results": {
node_id: str(path) for node_id, path in result.node_results.items()
},
"node_hashes": node_hashes, # node_id -> content_hash
"node_hashes": node_hashes, # node_id -> cid
"node_ipfs_cids": node_ipfs_cids, # node_id -> ipfs_cid
"provenance_cid": provenance_cid,
}
@@ -526,10 +526,10 @@ def build_effect_dag(input_hashes: List[str], effect_name: str) -> DAG:
# Add source nodes for each input
source_ids = []
for i, content_hash in enumerate(input_hashes):
for i, cid in enumerate(input_hashes):
source_node = Node(
node_type=NodeType.SOURCE,
config={"content_hash": content_hash},
config={"cid": cid},
name=f"source_{i}",
)
dag.add_node(source_node)

File diff suppressed because it is too large Load Diff

View File

@@ -27,12 +27,12 @@ class StorageProvider(ABC):
provider_type: str = "unknown"
@abstractmethod
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""
Pin content to storage.
Args:
content_hash: SHA3-256 hash of the content
cid: SHA3-256 hash of the content
data: Raw bytes to store
filename: Optional filename hint
@@ -42,12 +42,12 @@ class StorageProvider(ABC):
pass
@abstractmethod
async def unpin(self, content_hash: str) -> bool:
async def unpin(self, cid: str) -> bool:
"""
Unpin content from storage.
Args:
content_hash: SHA3-256 hash of the content
cid: SHA3-256 hash of the content
Returns:
True if unpinned successfully
@@ -55,12 +55,12 @@ class StorageProvider(ABC):
pass
@abstractmethod
async def get(self, content_hash: str) -> Optional[bytes]:
async def get(self, cid: str) -> Optional[bytes]:
"""
Retrieve content from storage.
Args:
content_hash: SHA3-256 hash of the content
cid: SHA3-256 hash of the content
Returns:
Raw bytes or None if not found
@@ -68,7 +68,7 @@ class StorageProvider(ABC):
pass
@abstractmethod
async def is_pinned(self, content_hash: str) -> bool:
async def is_pinned(self, cid: str) -> bool:
"""Check if content is pinned in this storage."""
pass
@@ -111,16 +111,16 @@ class PinataProvider(StorageProvider):
"pinata_secret_api_key": self.secret_key,
}
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Pin content to Pinata."""
try:
import asyncio
def do_pin():
files = {"file": (filename or f"{content_hash[:16]}.bin", data)}
files = {"file": (filename or f"{cid[:16]}.bin", data)}
metadata = {
"name": filename or content_hash[:16],
"keyvalues": {"content_hash": content_hash}
"name": filename or cid[:16],
"keyvalues": {"cid": cid}
}
response = requests.post(
f"{self.base_url}/pinning/pinFileToIPFS",
@@ -133,22 +133,22 @@ class PinataProvider(StorageProvider):
return response.json().get("IpfsHash")
cid = await asyncio.to_thread(do_pin)
logger.info(f"Pinata: Pinned {content_hash[:16]}... as {cid}")
logger.info(f"Pinata: Pinned {cid[:16]}... as {cid}")
return cid
except Exception as e:
logger.error(f"Pinata pin failed: {e}")
return None
async def unpin(self, content_hash: str) -> bool:
async def unpin(self, cid: str) -> bool:
"""Unpin content from Pinata by finding its CID first."""
try:
import asyncio
def do_unpin():
# First find the pin by content_hash metadata
# First find the pin by cid metadata
response = requests.get(
f"{self.base_url}/data/pinList",
params={"metadata[keyvalues][content_hash]": content_hash, "status": "pinned"},
params={"metadata[keyvalues][cid]": cid, "status": "pinned"},
headers=self._headers(),
timeout=30
)
@@ -171,13 +171,13 @@ class PinataProvider(StorageProvider):
return True
result = await asyncio.to_thread(do_unpin)
logger.info(f"Pinata: Unpinned {content_hash[:16]}...")
logger.info(f"Pinata: Unpinned {cid[:16]}...")
return result
except Exception as e:
logger.error(f"Pinata unpin failed: {e}")
return False
async def get(self, content_hash: str) -> Optional[bytes]:
async def get(self, cid: str) -> Optional[bytes]:
"""Get content from Pinata via IPFS gateway."""
try:
import asyncio
@@ -186,7 +186,7 @@ class PinataProvider(StorageProvider):
# First find the CID
response = requests.get(
f"{self.base_url}/data/pinList",
params={"metadata[keyvalues][content_hash]": content_hash, "status": "pinned"},
params={"metadata[keyvalues][cid]": cid, "status": "pinned"},
headers=self._headers(),
timeout=30
)
@@ -213,7 +213,7 @@ class PinataProvider(StorageProvider):
logger.error(f"Pinata get failed: {e}")
return None
async def is_pinned(self, content_hash: str) -> bool:
async def is_pinned(self, cid: str) -> bool:
"""Check if content is pinned on Pinata."""
try:
import asyncio
@@ -221,7 +221,7 @@ class PinataProvider(StorageProvider):
def do_check():
response = requests.get(
f"{self.base_url}/data/pinList",
params={"metadata[keyvalues][content_hash]": content_hash, "status": "pinned"},
params={"metadata[keyvalues][cid]": cid, "status": "pinned"},
headers=self._headers(),
timeout=30
)
@@ -286,7 +286,7 @@ class Web3StorageProvider(StorageProvider):
def _headers(self) -> dict:
return {"Authorization": f"Bearer {self.api_token}"}
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Pin content to web3.storage."""
try:
import asyncio
@@ -297,7 +297,7 @@ class Web3StorageProvider(StorageProvider):
data=data,
headers={
**self._headers(),
"X-Name": filename or content_hash[:16]
"X-Name": filename or cid[:16]
},
timeout=120
)
@@ -305,24 +305,24 @@ class Web3StorageProvider(StorageProvider):
return response.json().get("cid")
cid = await asyncio.to_thread(do_pin)
logger.info(f"web3.storage: Pinned {content_hash[:16]}... as {cid}")
logger.info(f"web3.storage: Pinned {cid[:16]}... as {cid}")
return cid
except Exception as e:
logger.error(f"web3.storage pin failed: {e}")
return None
async def unpin(self, content_hash: str) -> bool:
async def unpin(self, cid: str) -> bool:
"""web3.storage doesn't support unpinning - data is stored permanently."""
logger.warning("web3.storage: Unpinning not supported (permanent storage)")
return False
async def get(self, content_hash: str) -> Optional[bytes]:
async def get(self, cid: str) -> Optional[bytes]:
"""Get content from web3.storage - would need CID mapping."""
# web3.storage requires knowing the CID to fetch
# For now, return None - we'd need to maintain a mapping
return None
async def is_pinned(self, content_hash: str) -> bool:
async def is_pinned(self, cid: str) -> bool:
"""Check if content is pinned - would need CID mapping."""
return False
@@ -383,7 +383,7 @@ class NFTStorageProvider(StorageProvider):
def _headers(self) -> dict:
return {"Authorization": f"Bearer {self.api_token}"}
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Pin content to NFT.Storage."""
try:
import asyncio
@@ -399,22 +399,22 @@ class NFTStorageProvider(StorageProvider):
return response.json().get("value", {}).get("cid")
cid = await asyncio.to_thread(do_pin)
logger.info(f"NFT.Storage: Pinned {content_hash[:16]}... as {cid}")
logger.info(f"NFT.Storage: Pinned {cid[:16]}... as {cid}")
return cid
except Exception as e:
logger.error(f"NFT.Storage pin failed: {e}")
return None
async def unpin(self, content_hash: str) -> bool:
async def unpin(self, cid: str) -> bool:
"""NFT.Storage doesn't support unpinning - data is stored permanently."""
logger.warning("NFT.Storage: Unpinning not supported (permanent storage)")
return False
async def get(self, content_hash: str) -> Optional[bytes]:
async def get(self, cid: str) -> Optional[bytes]:
"""Get content from NFT.Storage - would need CID mapping."""
return None
async def is_pinned(self, content_hash: str) -> bool:
async def is_pinned(self, cid: str) -> bool:
"""Check if content is pinned - would need CID mapping."""
return False
@@ -459,13 +459,13 @@ class InfuraIPFSProvider(StorageProvider):
def _auth(self) -> tuple:
return (self.project_id, self.project_secret)
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Pin content to Infura IPFS."""
try:
import asyncio
def do_pin():
files = {"file": (filename or f"{content_hash[:16]}.bin", data)}
files = {"file": (filename or f"{cid[:16]}.bin", data)}
response = requests.post(
f"{self.base_url}/add",
files=files,
@@ -476,13 +476,13 @@ class InfuraIPFSProvider(StorageProvider):
return response.json().get("Hash")
cid = await asyncio.to_thread(do_pin)
logger.info(f"Infura IPFS: Pinned {content_hash[:16]}... as {cid}")
logger.info(f"Infura IPFS: Pinned {cid[:16]}... as {cid}")
return cid
except Exception as e:
logger.error(f"Infura IPFS pin failed: {e}")
return None
async def unpin(self, content_hash: str) -> bool:
async def unpin(self, cid: str) -> bool:
"""Unpin content from Infura IPFS."""
try:
import asyncio
@@ -490,7 +490,7 @@ class InfuraIPFSProvider(StorageProvider):
def do_unpin():
response = requests.post(
f"{self.base_url}/pin/rm",
params={"arg": content_hash},
params={"arg": cid},
auth=self._auth(),
timeout=30
)
@@ -502,7 +502,7 @@ class InfuraIPFSProvider(StorageProvider):
logger.error(f"Infura IPFS unpin failed: {e}")
return False
async def get(self, content_hash: str) -> Optional[bytes]:
async def get(self, cid: str) -> Optional[bytes]:
"""Get content from Infura IPFS gateway."""
try:
import asyncio
@@ -510,7 +510,7 @@ class InfuraIPFSProvider(StorageProvider):
def do_get():
response = requests.post(
f"{self.base_url}/cat",
params={"arg": content_hash},
params={"arg": cid},
auth=self._auth(),
timeout=120
)
@@ -522,7 +522,7 @@ class InfuraIPFSProvider(StorageProvider):
logger.error(f"Infura IPFS get failed: {e}")
return None
async def is_pinned(self, content_hash: str) -> bool:
async def is_pinned(self, cid: str) -> bool:
"""Check if content is pinned on Infura IPFS."""
try:
import asyncio
@@ -530,7 +530,7 @@ class InfuraIPFSProvider(StorageProvider):
def do_check():
response = requests.post(
f"{self.base_url}/pin/ls",
params={"arg": content_hash},
params={"arg": cid},
auth=self._auth(),
timeout=30
)
@@ -579,7 +579,7 @@ class FilebaseProvider(StorageProvider):
self.capacity_bytes = capacity_gb * 1024**3
self.endpoint = "https://s3.filebase.com"
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Pin content to Filebase."""
try:
import asyncio
@@ -594,20 +594,20 @@ class FilebaseProvider(StorageProvider):
aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4')
)
key = filename or f"{content_hash[:16]}.bin"
key = filename or f"{cid[:16]}.bin"
s3.put_object(Bucket=self.bucket, Key=key, Body=data)
# Get CID from response headers
head = s3.head_object(Bucket=self.bucket, Key=key)
return head.get('Metadata', {}).get('cid', content_hash)
return head.get('Metadata', {}).get('cid', cid)
cid = await asyncio.to_thread(do_pin)
logger.info(f"Filebase: Pinned {content_hash[:16]}... as {cid}")
logger.info(f"Filebase: Pinned {cid[:16]}... as {cid}")
return cid
except Exception as e:
logger.error(f"Filebase pin failed: {e}")
return None
async def unpin(self, content_hash: str) -> bool:
async def unpin(self, cid: str) -> bool:
"""Remove content from Filebase."""
try:
import asyncio
@@ -622,7 +622,7 @@ class FilebaseProvider(StorageProvider):
aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4')
)
s3.delete_object(Bucket=self.bucket, Key=content_hash)
s3.delete_object(Bucket=self.bucket, Key=cid)
return True
return await asyncio.to_thread(do_unpin)
@@ -630,7 +630,7 @@ class FilebaseProvider(StorageProvider):
logger.error(f"Filebase unpin failed: {e}")
return False
async def get(self, content_hash: str) -> Optional[bytes]:
async def get(self, cid: str) -> Optional[bytes]:
"""Get content from Filebase."""
try:
import asyncio
@@ -645,7 +645,7 @@ class FilebaseProvider(StorageProvider):
aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4')
)
response = s3.get_object(Bucket=self.bucket, Key=content_hash)
response = s3.get_object(Bucket=self.bucket, Key=cid)
return response['Body'].read()
return await asyncio.to_thread(do_get)
@@ -653,7 +653,7 @@ class FilebaseProvider(StorageProvider):
logger.error(f"Filebase get failed: {e}")
return None
async def is_pinned(self, content_hash: str) -> bool:
async def is_pinned(self, cid: str) -> bool:
"""Check if content exists in Filebase."""
try:
import asyncio
@@ -668,7 +668,7 @@ class FilebaseProvider(StorageProvider):
aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4')
)
s3.head_object(Bucket=self.bucket, Key=content_hash)
s3.head_object(Bucket=self.bucket, Key=cid)
return True
return await asyncio.to_thread(do_check)
@@ -718,7 +718,7 @@ class StorjProvider(StorageProvider):
self.capacity_bytes = capacity_gb * 1024**3
self.endpoint = "https://gateway.storjshare.io"
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Store content on Storj."""
try:
import asyncio
@@ -733,18 +733,18 @@ class StorjProvider(StorageProvider):
aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4')
)
key = filename or content_hash
key = filename or cid
s3.put_object(Bucket=self.bucket, Key=key, Body=data)
return content_hash
return cid
result = await asyncio.to_thread(do_pin)
logger.info(f"Storj: Stored {content_hash[:16]}...")
logger.info(f"Storj: Stored {cid[:16]}...")
return result
except Exception as e:
logger.error(f"Storj pin failed: {e}")
return None
async def unpin(self, content_hash: str) -> bool:
async def unpin(self, cid: str) -> bool:
"""Remove content from Storj."""
try:
import asyncio
@@ -759,7 +759,7 @@ class StorjProvider(StorageProvider):
aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4')
)
s3.delete_object(Bucket=self.bucket, Key=content_hash)
s3.delete_object(Bucket=self.bucket, Key=cid)
return True
return await asyncio.to_thread(do_unpin)
@@ -767,7 +767,7 @@ class StorjProvider(StorageProvider):
logger.error(f"Storj unpin failed: {e}")
return False
async def get(self, content_hash: str) -> Optional[bytes]:
async def get(self, cid: str) -> Optional[bytes]:
"""Get content from Storj."""
try:
import asyncio
@@ -782,7 +782,7 @@ class StorjProvider(StorageProvider):
aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4')
)
response = s3.get_object(Bucket=self.bucket, Key=content_hash)
response = s3.get_object(Bucket=self.bucket, Key=cid)
return response['Body'].read()
return await asyncio.to_thread(do_get)
@@ -790,7 +790,7 @@ class StorjProvider(StorageProvider):
logger.error(f"Storj get failed: {e}")
return None
async def is_pinned(self, content_hash: str) -> bool:
async def is_pinned(self, cid: str) -> bool:
"""Check if content exists on Storj."""
try:
import asyncio
@@ -805,7 +805,7 @@ class StorjProvider(StorageProvider):
aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4')
)
s3.head_object(Bucket=self.bucket, Key=content_hash)
s3.head_object(Bucket=self.bucket, Key=cid)
return True
return await asyncio.to_thread(do_check)
@@ -854,37 +854,37 @@ class LocalStorageProvider(StorageProvider):
# Create directory if it doesn't exist
self.base_path.mkdir(parents=True, exist_ok=True)
def _get_file_path(self, content_hash: str) -> Path:
def _get_file_path(self, cid: str) -> Path:
"""Get file path for a content hash (using subdirectories)."""
# Use first 2 chars as subdirectory for better filesystem performance
subdir = content_hash[:2]
return self.base_path / subdir / content_hash
subdir = cid[:2]
return self.base_path / subdir / cid
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Store content locally."""
try:
import asyncio
def do_store():
file_path = self._get_file_path(content_hash)
file_path = self._get_file_path(cid)
file_path.parent.mkdir(parents=True, exist_ok=True)
file_path.write_bytes(data)
return content_hash # Use content_hash as ID for local storage
return cid # Use cid as ID for local storage
result = await asyncio.to_thread(do_store)
logger.info(f"Local: Stored {content_hash[:16]}...")
logger.info(f"Local: Stored {cid[:16]}...")
return result
except Exception as e:
logger.error(f"Local storage failed: {e}")
return None
async def unpin(self, content_hash: str) -> bool:
async def unpin(self, cid: str) -> bool:
"""Remove content from local storage."""
try:
import asyncio
def do_remove():
file_path = self._get_file_path(content_hash)
file_path = self._get_file_path(cid)
if file_path.exists():
file_path.unlink()
return True
@@ -895,13 +895,13 @@ class LocalStorageProvider(StorageProvider):
logger.error(f"Local unpin failed: {e}")
return False
async def get(self, content_hash: str) -> Optional[bytes]:
async def get(self, cid: str) -> Optional[bytes]:
"""Get content from local storage."""
try:
import asyncio
def do_get():
file_path = self._get_file_path(content_hash)
file_path = self._get_file_path(cid)
if file_path.exists():
return file_path.read_bytes()
return None
@@ -911,9 +911,9 @@ class LocalStorageProvider(StorageProvider):
logger.error(f"Local get failed: {e}")
return None
async def is_pinned(self, content_hash: str) -> bool:
async def is_pinned(self, cid: str) -> bool:
"""Check if content exists in local storage."""
return self._get_file_path(content_hash).exists()
return self._get_file_path(cid).exists()
async def test_connection(self) -> tuple[bool, str]:
"""Test local storage is writable."""

View File

@@ -81,8 +81,8 @@ def execute_step(
# Get L1 cache manager (IPFS-backed)
cache_mgr = get_cache_manager()
# Check if already cached (by cache_id as content_hash)
cached_path = cache_mgr.get_by_content_hash(step.cache_id)
# Check if already cached (by cache_id as cid)
cached_path = cache_mgr.get_by_cid(step.cache_id)
if cached_path:
logger.info(f"Step {step.step_id} already cached at {cached_path}")
@@ -141,14 +141,14 @@ def execute_step(
try:
# Handle SOURCE nodes
if step.node_type == "SOURCE":
content_hash = step.config.get("content_hash")
if not content_hash:
raise ValueError(f"SOURCE step missing content_hash")
cid = step.config.get("cid")
if not cid:
raise ValueError(f"SOURCE step missing cid")
# Look up in cache
path = cache_mgr.get_by_content_hash(content_hash)
path = cache_mgr.get_by_cid(cid)
if not path:
raise ValueError(f"SOURCE input not found in cache: {content_hash[:16]}...")
raise ValueError(f"SOURCE input not found in cache: {cid[:16]}...")
output_path = str(path)
complete_task(step.cache_id, worker_id, output_path)
@@ -165,7 +165,7 @@ def execute_step(
for item_id in step.config.get("items", []):
item_cache_id = input_cache_ids.get(item_id)
if item_cache_id:
path = cache_mgr.get_by_content_hash(item_cache_id)
path = cache_mgr.get_by_cid(item_cache_id)
if path:
item_paths.append(str(path))
@@ -190,7 +190,7 @@ def execute_step(
input_cache_id = input_cache_ids.get(input_step_id)
if not input_cache_id:
raise ValueError(f"No cache_id for input step: {input_step_id}")
path = cache_mgr.get_by_content_hash(input_cache_id)
path = cache_mgr.get_by_cid(input_cache_id)
if not path:
raise ValueError(f"Input not in cache: {input_cache_id[:16]}...")
input_paths.append(Path(path))
@@ -276,7 +276,7 @@ def execute_step(
"step_id": step.step_id,
"cache_id": step.cache_id,
"output_path": str(cached_file.path),
"content_hash": cached_file.content_hash,
"cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
"filter_count": len(filter_chain),
}
@@ -298,7 +298,7 @@ def execute_step(
if not input_cache_id:
raise ValueError(f"No cache_id for input step: {input_step_id}")
path = cache_mgr.get_by_content_hash(input_cache_id)
path = cache_mgr.get_by_cid(input_cache_id)
if not path:
raise ValueError(f"Input not in cache: {input_cache_id[:16]}...")
@@ -336,7 +336,7 @@ def execute_step(
"media_type": output_def.media_type,
"index": output_def.index,
"path": str(cached_file.path),
"content_hash": cached_file.content_hash,
"cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
})
else:
@@ -347,7 +347,7 @@ def execute_step(
"media_type": "video/mp4",
"index": 0,
"path": str(cached_file.path),
"content_hash": cached_file.content_hash,
"cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
})
@@ -362,7 +362,7 @@ def execute_step(
"name": step.name,
"cache_id": step.cache_id,
"output_path": str(cached_file.path),
"content_hash": cached_file.content_hash,
"cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
"outputs": outputs,
}

View File

@@ -140,7 +140,7 @@ def execute_step_sexp(
cache_mgr = get_cache_manager()
# Check if already cached
cached_path = cache_mgr.get_by_content_hash(cache_id)
cached_path = cache_mgr.get_by_cid(cache_id)
if cached_path:
logger.info(f"Step {step_id} already cached at {cached_path}")
@@ -202,7 +202,7 @@ def execute_step_sexp(
if not content_id:
raise ValueError("SOURCE step missing :cid or :hash")
path = cache_mgr.get_by_content_hash(content_id)
path = cache_mgr.get_by_cid(content_id)
if not path:
raise ValueError(f"SOURCE input not found: {content_id[:16]}...")
@@ -226,7 +226,7 @@ def execute_step_sexp(
input_paths = []
for inp in inputs:
inp_cache_id = input_cache_ids.get(inp, inp)
path = cache_mgr.get_by_content_hash(inp_cache_id)
path = cache_mgr.get_by_cid(inp_cache_id)
if not path:
raise ValueError(f"Input not found: {inp_cache_id[:16]}...")
input_paths.append(Path(path))
@@ -261,7 +261,7 @@ def execute_step_sexp(
input_paths = []
for inp in inputs:
inp_cache_id = input_cache_ids.get(inp, inp)
path = cache_mgr.get_by_content_hash(inp_cache_id)
path = cache_mgr.get_by_cid(inp_cache_id)
if not path:
raise ValueError(f"Input not found: {inp_cache_id[:16]}...")
input_paths.append(Path(path))
@@ -366,7 +366,7 @@ def execute_step_sexp(
"step_id": step_id,
"cache_id": cache_id,
"output_path": str(cached_file.path),
"content_hash": cached_file.content_hash,
"cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
"filter_count": len(filter_chain),
}
@@ -386,7 +386,7 @@ def execute_step_sexp(
input_paths = []
for inp in inputs:
inp_cache_id = input_cache_ids.get(inp, inp)
path = cache_mgr.get_by_content_hash(inp_cache_id)
path = cache_mgr.get_by_cid(inp_cache_id)
if not path:
raise ValueError(f"Input not found: {inp_cache_id[:16]}...")
input_paths.append(Path(path))
@@ -420,7 +420,7 @@ def execute_step_sexp(
"step_id": step_id,
"cache_id": cache_id,
"output_path": str(cached_file.path),
"content_hash": cached_file.content_hash,
"cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
}

View File

@@ -80,8 +80,8 @@ def run_plan(
cache_ids[step.step_id] = step.cache_id
# Also map input hashes
for name, content_hash in plan.input_hashes.items():
cache_ids[name] = content_hash
for name, cid in plan.input_hashes.items():
cache_ids[name] = cid
# Group steps by level
steps_by_level = plan.get_steps_by_level()
@@ -103,7 +103,7 @@ def run_plan(
for step in level_steps:
# Check if cached
cached_path = cache_mgr.get_by_content_hash(step.cache_id)
cached_path = cache_mgr.get_by_cid(step.cache_id)
if cached_path:
results_by_step[step.step_id] = {
"status": "cached",
@@ -171,7 +171,7 @@ def run_plan(
output_name = plan.output_name
if output_cache_id:
output_path = cache_mgr.get_by_content_hash(output_cache_id)
output_path = cache_mgr.get_by_cid(output_cache_id)
output_ipfs_cid = cache_mgr.get_ipfs_cid(output_cache_id)
# Build list of all outputs with their names and artifacts
@@ -183,7 +183,7 @@ def run_plan(
# If no outputs in result, build from step definition
if not step_outputs and step.outputs:
for output_def in step.outputs:
output_cache_path = cache_mgr.get_by_content_hash(output_def.cache_id)
output_cache_path = cache_mgr.get_by_cid(output_def.cache_id)
output_ipfs = cache_mgr.get_ipfs_cid(output_def.cache_id) if output_cache_path else None
all_outputs.append({
"name": output_def.name,
@@ -318,28 +318,28 @@ def run_recipe(
node_id = analysis_node["node_id"]
# Resolve input reference to content hash
content_hash = input_hashes.get(input_ref)
if not content_hash:
cid = input_hashes.get(input_ref)
if not cid:
logger.warning(f"Analysis node {node_id}: input '{input_ref}' not in input_hashes")
continue
path = cache_mgr.get_by_content_hash(content_hash)
path = cache_mgr.get_by_cid(cid)
if not path:
logger.warning(f"Analysis node {node_id}: content {content_hash[:16]}... not in cache")
logger.warning(f"Analysis node {node_id}: content {cid[:16]}... not in cache")
continue
try:
# Run analysis for the specific feature
features = [feature] if feature else ["beats", "energy"]
result = analyzer.analyze(
input_hash=content_hash,
input_hash=cid,
features=features,
input_path=Path(path),
)
# Store result keyed by node_id so plan can reference it
analysis_results[node_id] = result
# Also store by content_hash for compatibility
analysis_results[content_hash] = result
# Also store by cid for compatibility
analysis_results[cid] = result
logger.info(f"Analysis {node_id}: feature={feature}, tempo={result.tempo}")
except Exception as e:
logger.warning(f"Analysis failed for {node_id}: {e}")
@@ -380,7 +380,7 @@ def run_recipe(
# Store in cache (content-addressed, auto-pins to IPFS)
# Plan is just another node output - no special treatment needed
cached, plan_ipfs_cid = cache_mgr.put(tmp_path, node_type="plan", move=True)
logger.info(f"Plan cached: hash={cached.content_hash}, ipfs={plan_ipfs_cid}")
logger.info(f"Plan cached: hash={cached.cid}, ipfs={plan_ipfs_cid}")
# Phase 4: Execute
logger.info("Phase 4: Executing plan...")
@@ -392,7 +392,7 @@ def run_recipe(
"run_id": run_id,
"recipe": compiled.name,
"plan_id": plan.plan_id,
"plan_cache_id": cached.content_hash,
"plan_cache_id": cached.cid,
"plan_ipfs_cid": plan_ipfs_cid,
"output_path": result.get("output_path"),
"output_cache_id": result.get("output_cache_id"),
@@ -454,21 +454,21 @@ def generate_plan(
feature = analysis_node["feature"]
node_id = analysis_node["node_id"]
content_hash = input_hashes.get(input_ref)
if not content_hash:
cid = input_hashes.get(input_ref)
if not cid:
continue
path = cache_mgr.get_by_content_hash(content_hash)
path = cache_mgr.get_by_cid(cid)
if path:
try:
features = [feature] if feature else ["beats", "energy"]
result = analyzer.analyze(
input_hash=content_hash,
input_hash=cid,
features=features,
input_path=Path(path),
)
analysis_results[node_id] = result
analysis_results[content_hash] = result
analysis_results[cid] = result
except Exception as e:
logger.warning(f"Analysis failed for {node_id}: {e}")

View File

@@ -67,7 +67,7 @@ def register_input_cid(
input_path: Local path to the input file
Returns:
Dict with 'cid' and 'content_hash'
Dict with 'cid' and 'cid'
"""
import hashlib
@@ -77,7 +77,7 @@ def register_input_cid(
# Compute content hash
with open(path, "rb") as f:
content_hash = hashlib.sha3_256(f.read()).hexdigest()
cid = hashlib.sha3_256(f.read()).hexdigest()
# Add to IPFS
cid = ipfs_client.add_file(path)
@@ -89,7 +89,7 @@ def register_input_cid(
return {
"status": "completed",
"cid": cid,
"content_hash": content_hash,
"cid": cid,
"path": str(path),
}
@@ -426,7 +426,7 @@ def run_from_local(
return {"status": "failed", "phase": "register_input", "input": name, "error": result.get("error")}
input_cids[name] = result["cid"]
input_hashes[name] = result["content_hash"]
input_hashes[name] = result["cid"]
# Run the pipeline
return run_recipe_cid.apply_async(

View File

@@ -130,13 +130,13 @@ class TestL2SharedChecker:
class TestL1CacheManagerStorage:
"""Tests for cache storage operations."""
def test_put_and_get_by_content_hash(self, manager, temp_dir):
def test_put_and_get_by_cid(self, manager, temp_dir):
"""Can store and retrieve by content hash."""
test_file = create_test_file(temp_dir / "input.txt", "hello world")
cached = manager.put(test_file, node_type="test")
retrieved_path = manager.get_by_content_hash(cached.content_hash)
retrieved_path = manager.get_by_cid(cached.cid)
assert retrieved_path is not None
assert retrieved_path.read_text() == "hello world"
@@ -155,7 +155,7 @@ class TestL1CacheManagerStorage:
cached = manager.put(test_file, node_type="test")
assert manager.has_content(cached.content_hash) is True
assert manager.has_content(cached.cid) is True
assert manager.has_content("nonexistent") is False
def test_list_all(self, manager, temp_dir):
@@ -177,7 +177,7 @@ class TestL1CacheManagerStorage:
cached1 = manager.put(f1, node_type="test")
cached2 = manager.put(f2, node_type="test")
assert cached1.content_hash == cached2.content_hash
assert cached1.cid == cached2.cid
assert len(manager.list_all()) == 1
@@ -193,14 +193,14 @@ class TestL1CacheManagerActivities:
output_cached = manager.put(output_file, node_type="effect")
activity = manager.record_simple_activity(
input_hashes=[input_cached.content_hash],
output_hash=output_cached.content_hash,
input_hashes=[input_cached.cid],
output_cid=output_cached.cid,
run_id="run-001",
)
assert activity.activity_id == "run-001"
assert input_cached.content_hash in activity.input_ids
assert activity.output_id == output_cached.content_hash
assert input_cached.cid in activity.input_ids
assert activity.output_id == output_cached.cid
def test_list_activities(self, manager, temp_dir):
"""Can list all activities."""
@@ -209,7 +209,7 @@ class TestL1CacheManagerActivities:
out = create_test_file(temp_dir / f"out{i}.txt", f"output{i}")
inp_c = manager.put(inp, node_type="source")
out_c = manager.put(out, node_type="effect")
manager.record_simple_activity([inp_c.content_hash], out_c.content_hash)
manager.record_simple_activity([inp_c.cid], out_c.cid)
activities = manager.list_activities()
assert len(activities) == 3
@@ -225,10 +225,10 @@ class TestL1CacheManagerActivities:
out1_c = manager.put(out1, node_type="effect")
out2_c = manager.put(out2, node_type="effect")
manager.record_simple_activity([input_cached.content_hash], out1_c.content_hash, "run1")
manager.record_simple_activity([input_cached.content_hash], out2_c.content_hash, "run2")
manager.record_simple_activity([input_cached.cid], out1_c.cid, "run1")
manager.record_simple_activity([input_cached.cid], out2_c.cid, "run2")
found = manager.find_activities_by_inputs([input_cached.content_hash])
found = manager.find_activities_by_inputs([input_cached.cid])
assert len(found) == 2
@@ -240,7 +240,7 @@ class TestL1CacheManagerDeletionRules:
test_file = create_test_file(temp_dir / "orphan.txt", "orphan")
cached = manager.put(test_file, node_type="test")
can_delete, reason = manager.can_delete(cached.content_hash)
can_delete, reason = manager.can_delete(cached.cid)
assert can_delete is True
def test_cannot_delete_activity_input(self, manager, temp_dir):
@@ -252,11 +252,11 @@ class TestL1CacheManagerDeletionRules:
output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity(
[input_cached.content_hash],
output_cached.content_hash,
[input_cached.cid],
output_cached.cid,
)
can_delete, reason = manager.can_delete(input_cached.content_hash)
can_delete, reason = manager.can_delete(input_cached.cid)
assert can_delete is False
assert "input" in reason.lower()
@@ -269,11 +269,11 @@ class TestL1CacheManagerDeletionRules:
output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity(
[input_cached.content_hash],
output_cached.content_hash,
[input_cached.cid],
output_cached.cid,
)
can_delete, reason = manager.can_delete(output_cached.content_hash)
can_delete, reason = manager.can_delete(output_cached.cid)
assert can_delete is False
assert "output" in reason.lower()
@@ -283,9 +283,9 @@ class TestL1CacheManagerDeletionRules:
cached = manager.put(test_file, node_type="test")
# Mark as pinned (published)
manager.pin(cached.content_hash, reason="published")
manager.pin(cached.cid, reason="published")
can_delete, reason = manager.can_delete(cached.content_hash)
can_delete, reason = manager.can_delete(cached.cid)
assert can_delete is False
assert "pinned" in reason
@@ -294,10 +294,10 @@ class TestL1CacheManagerDeletionRules:
test_file = create_test_file(temp_dir / "orphan.txt", "orphan")
cached = manager.put(test_file, node_type="test")
success, msg = manager.delete_by_content_hash(cached.content_hash)
success, msg = manager.delete_by_cid(cached.cid)
assert success is True
assert manager.has_content(cached.content_hash) is False
assert manager.has_content(cached.cid) is False
def test_delete_protected_item_fails(self, manager, temp_dir):
"""Cannot delete protected items."""
@@ -308,14 +308,14 @@ class TestL1CacheManagerDeletionRules:
output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity(
[input_cached.content_hash],
output_cached.content_hash,
[input_cached.cid],
output_cached.cid,
)
success, msg = manager.delete_by_content_hash(input_cached.content_hash)
success, msg = manager.delete_by_cid(input_cached.cid)
assert success is False
assert manager.has_content(input_cached.content_hash) is True
assert manager.has_content(input_cached.cid) is True
class TestL1CacheManagerActivityDiscard:
@@ -330,8 +330,8 @@ class TestL1CacheManagerActivityDiscard:
output_cached = manager.put(output_file, node_type="effect")
activity = manager.record_simple_activity(
[input_cached.content_hash],
output_cached.content_hash,
[input_cached.cid],
output_cached.cid,
"run-001",
)
@@ -347,13 +347,13 @@ class TestL1CacheManagerActivityDiscard:
output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity(
[input_cached.content_hash],
output_cached.content_hash,
[input_cached.cid],
output_cached.cid,
"run-001",
)
# Mark output as pinned (published)
manager.pin(output_cached.content_hash, reason="published")
manager.pin(output_cached.cid, reason="published")
can_discard, reason = manager.can_discard_activity("run-001")
assert can_discard is False
@@ -368,8 +368,8 @@ class TestL1CacheManagerActivityDiscard:
output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity(
[input_cached.content_hash],
output_cached.content_hash,
[input_cached.cid],
output_cached.cid,
"run-001",
)