Rename content_hash/output_hash to cid throughout

Refactor to use IPFS CID as the primary content identifier:
- Update database schema: content_hash -> cid, output_hash -> output_cid
- Update all services, routers, and tasks to use cid terminology
- Update HTML templates to display CID instead of hash
- Update cache_manager parameter names
- Update README documentation

This completes the transition to CID-only content addressing.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-12 08:02:44 +00:00
parent 494a2a8650
commit 92d26b2b72
22 changed files with 981 additions and 988 deletions

View File

@@ -5,7 +5,7 @@ L1 rendering server for the Art DAG system. Manages distributed rendering jobs v
## Features ## Features
- **3-Phase Execution**: Analyze → Plan → Execute pipeline for recipe-based rendering - **3-Phase Execution**: Analyze → Plan → Execute pipeline for recipe-based rendering
- **Content-Addressable Caching**: SHA3-256 hashed content with deduplication - **Content-Addressable Caching**: IPFS CIDs with deduplication
- **IPFS Integration**: Optional IPFS-primary mode for distributed storage - **IPFS Integration**: Optional IPFS-primary mode for distributed storage
- **Storage Providers**: S3, IPFS, and local storage backends - **Storage Providers**: S3, IPFS, and local storage backends
- **DAG Visualization**: Interactive graph visualization of execution plans - **DAG Visualization**: Interactive graph visualization of execution plans
@@ -130,13 +130,13 @@ Interactive docs: http://localhost:8100/docs
| Method | Path | Description | | Method | Path | Description |
|--------|------|-------------| |--------|------|-------------|
| GET | `/cache/{hash}` | Get cached content (with preview) | | GET | `/cache/{cid}` | Get cached content (with preview) |
| GET | `/cache/{hash}/raw` | Download raw content | | GET | `/cache/{cid}/raw` | Download raw content |
| GET | `/cache/{hash}/mp4` | Get MP4 video | | GET | `/cache/{cid}/mp4` | Get MP4 video |
| GET | `/cache/{hash}/meta` | Get content metadata | | GET | `/cache/{cid}/meta` | Get content metadata |
| PATCH | `/cache/{hash}/meta` | Update metadata | | PATCH | `/cache/{cid}/meta` | Update metadata |
| POST | `/cache/{hash}/publish` | Publish to L2 | | POST | `/cache/{cid}/publish` | Publish to L2 |
| DELETE | `/cache/{hash}` | Delete from cache | | DELETE | `/cache/{cid}` | Delete from cache |
| POST | `/cache/import?path=` | Import local file | | POST | `/cache/import?path=` | Import local file |
| POST | `/cache/upload` | Upload file | | POST | `/cache/upload` | Upload file |
| GET | `/media` | Browse media gallery | | GET | `/media` | Browse media gallery |
@@ -185,7 +185,7 @@ Recipes are executed in three phases:
### Phase 1: Analyze ### Phase 1: Analyze
Extract features from input files: Extract features from input files:
- **Audio/Video**: Tempo, beat times, energy levels - **Audio/Video**: Tempo, beat times, energy levels
- Results cached by content hash - Results cached by CID
### Phase 2: Plan ### Phase 2: Plan
Generate an execution plan: Generate an execution plan:
@@ -237,7 +237,7 @@ output: sync_video
### Local Cache ### Local Cache
- Location: `~/.artdag/cache/` (or `CACHE_DIR`) - Location: `~/.artdag/cache/` (or `CACHE_DIR`)
- Content-addressed by SHA3-256 hash - Content-addressed by IPFS CID
- Subdirectories: `plans/`, `analysis/` - Subdirectories: `plans/`, `analysis/`
### Redis ### Redis
@@ -318,12 +318,12 @@ Every render produces a provenance record:
"task_id": "celery-task-uuid", "task_id": "celery-task-uuid",
"rendered_at": "2026-01-07T...", "rendered_at": "2026-01-07T...",
"rendered_by": "@giles@artdag.rose-ash.com", "rendered_by": "@giles@artdag.rose-ash.com",
"output": {"name": "...", "content_hash": "..."}, "output": {"name": "...", "cid": "Qm..."},
"inputs": [...], "inputs": [...],
"effects": [...], "effects": [...],
"infrastructure": { "infrastructure": {
"software": {"name": "infra:artdag", "content_hash": "..."}, "software": {"name": "infra:artdag", "cid": "Qm..."},
"hardware": {"name": "infra:giles-hp", "content_hash": "..."} "hardware": {"name": "infra:giles-hp", "cid": "Qm..."}
} }
} }
``` ```

View File

@@ -155,13 +155,13 @@ async def run_recipe_endpoint(
# Check if already completed # Check if already completed
cached = await database.get_run_cache(run_id) cached = await database.get_run_cache(run_id)
if cached: if cached:
output_hash = cached.get("output_hash") output_cid = cached.get("output_cid")
if cache.has_content(output_hash): if cache.has_content(output_cid):
return { return {
"status": "completed", "status": "completed",
"run_id": run_id, "run_id": run_id,
"output_hash": output_hash, "output_cid": output_cid,
"output_ipfs_cid": cache.get_ipfs_cid(output_hash), "output_ipfs_cid": cache.get_ipfs_cid(output_cid),
"cached": True, "cached": True,
} }
@@ -224,7 +224,7 @@ async def get_run_status(
if result.successful(): if result.successful():
task_result = result.get() task_result = result.get()
data["status"] = task_result.get("status", "completed") data["status"] = task_result.get("status", "completed")
data["output_hash"] = task_result.get("output_cache_id") data["output_cid"] = task_result.get("output_cache_id")
data["output_ipfs_cid"] = task_result.get("output_ipfs_cid") data["output_ipfs_cid"] = task_result.get("output_ipfs_cid")
data["total_steps"] = task_result.get("total_steps") data["total_steps"] = task_result.get("total_steps")
data["cached"] = task_result.get("cached") data["cached"] = task_result.get("cached")
@@ -250,7 +250,7 @@ async def get_run_status(
return { return {
"run_id": run_id, "run_id": run_id,
"status": "completed", "status": "completed",
"output_hash": cached.get("output_hash"), "output_cid": cached.get("output_cid"),
"cached": True, "cached": True,
} }

View File

@@ -40,9 +40,9 @@ def get_cache_service():
return CacheService(database, get_cache_manager()) return CacheService(database, get_cache_manager())
@router.get("/{content_hash}") @router.get("/{cid}")
async def get_cached( async def get_cached(
content_hash: str, cid: str,
request: Request, request: Request,
cache_service: CacheService = Depends(get_cache_service), cache_service: CacheService = Depends(get_cache_service),
): ):
@@ -50,16 +50,16 @@ async def get_cached(
auth_service = AuthService(get_redis_client()) auth_service = AuthService(get_redis_client())
ctx = auth_service.get_user_from_cookie(request) ctx = auth_service.get_user_from_cookie(request)
cache_item = await cache_service.get_cache_item(content_hash) cache_item = await cache_service.get_cache_item(cid)
if not cache_item: if not cache_item:
if wants_html(request): if wants_html(request):
templates = get_templates(request) templates = get_templates(request)
return render(templates, "cache/not_found.html", request, return render(templates, "cache/not_found.html", request,
content_hash=content_hash, cid=cid,
user=ctx, user=ctx,
active_tab="media", active_tab="media",
) )
raise HTTPException(404, f"Content {content_hash} not in cache") raise HTTPException(404, f"Content {cid} not in cache")
# JSON response # JSON response
if wants_json(request): if wants_json(request):
@@ -71,7 +71,7 @@ async def get_cached(
return RedirectResponse(url="/auth", status_code=302) return RedirectResponse(url="/auth", status_code=302)
# Check access # Check access
has_access = await cache_service.check_access(content_hash, ctx.actor_id, ctx.username) has_access = await cache_service.check_access(cid, ctx.actor_id, ctx.username)
if not has_access: if not has_access:
raise HTTPException(403, "Access denied") raise HTTPException(403, "Access denied")
@@ -83,27 +83,27 @@ async def get_cached(
) )
@router.get("/{content_hash}/raw") @router.get("/{cid}/raw")
async def get_cached_raw( async def get_cached_raw(
content_hash: str, cid: str,
cache_service: CacheService = Depends(get_cache_service), cache_service: CacheService = Depends(get_cache_service),
): ):
"""Get raw cached content (file download).""" """Get raw cached content (file download)."""
file_path, media_type, filename = await cache_service.get_raw_file(content_hash) file_path, media_type, filename = await cache_service.get_raw_file(cid)
if not file_path: if not file_path:
raise HTTPException(404, f"Content {content_hash} not in cache") raise HTTPException(404, f"Content {cid} not in cache")
return FileResponse(file_path, media_type=media_type, filename=filename) return FileResponse(file_path, media_type=media_type, filename=filename)
@router.get("/{content_hash}/mp4") @router.get("/{cid}/mp4")
async def get_cached_mp4( async def get_cached_mp4(
content_hash: str, cid: str,
cache_service: CacheService = Depends(get_cache_service), cache_service: CacheService = Depends(get_cache_service),
): ):
"""Get cached content as MP4 (transcodes MKV on first request).""" """Get cached content as MP4 (transcodes MKV on first request)."""
mp4_path, error = await cache_service.get_as_mp4(content_hash) mp4_path, error = await cache_service.get_as_mp4(cid)
if error: if error:
raise HTTPException(400 if "not a video" in error else 404, error) raise HTTPException(400 if "not a video" in error else 404, error)
@@ -111,29 +111,29 @@ async def get_cached_mp4(
return FileResponse(mp4_path, media_type="video/mp4") return FileResponse(mp4_path, media_type="video/mp4")
@router.get("/{content_hash}/meta") @router.get("/{cid}/meta")
async def get_metadata( async def get_metadata(
content_hash: str, cid: str,
ctx: UserContext = Depends(require_auth), ctx: UserContext = Depends(require_auth),
cache_service: CacheService = Depends(get_cache_service), cache_service: CacheService = Depends(get_cache_service),
): ):
"""Get content metadata.""" """Get content metadata."""
meta = await cache_service.get_metadata(content_hash, ctx.actor_id) meta = await cache_service.get_metadata(cid, ctx.actor_id)
if meta is None: if meta is None:
raise HTTPException(404, "Content not found") raise HTTPException(404, "Content not found")
return meta return meta
@router.patch("/{content_hash}/meta") @router.patch("/{cid}/meta")
async def update_metadata( async def update_metadata(
content_hash: str, cid: str,
req: UpdateMetadataRequest, req: UpdateMetadataRequest,
ctx: UserContext = Depends(require_auth), ctx: UserContext = Depends(require_auth),
cache_service: CacheService = Depends(get_cache_service), cache_service: CacheService = Depends(get_cache_service),
): ):
"""Update content metadata.""" """Update content metadata."""
success, error = await cache_service.update_metadata( success, error = await cache_service.update_metadata(
content_hash=content_hash, cid=cid,
actor_id=ctx.actor_id, actor_id=ctx.actor_id,
title=req.title, title=req.title,
description=req.description, description=req.description,
@@ -147,16 +147,16 @@ async def update_metadata(
return {"updated": True} return {"updated": True}
@router.post("/{content_hash}/publish") @router.post("/{cid}/publish")
async def publish_content( async def publish_content(
content_hash: str, cid: str,
request: Request, request: Request,
ctx: UserContext = Depends(require_auth), ctx: UserContext = Depends(require_auth),
cache_service: CacheService = Depends(get_cache_service), cache_service: CacheService = Depends(get_cache_service),
): ):
"""Publish content to L2 and IPFS.""" """Publish content to L2 and IPFS."""
ipfs_cid, error = await cache_service.publish_to_l2( ipfs_cid, error = await cache_service.publish_to_l2(
content_hash=content_hash, cid=cid,
actor_id=ctx.actor_id, actor_id=ctx.actor_id,
l2_server=ctx.l2_server, l2_server=ctx.l2_server,
auth_token=request.cookies.get("auth_token"), auth_token=request.cookies.get("auth_token"),
@@ -173,14 +173,14 @@ async def publish_content(
return {"ipfs_cid": ipfs_cid, "published": True} return {"ipfs_cid": ipfs_cid, "published": True}
@router.delete("/{content_hash}") @router.delete("/{cid}")
async def delete_content( async def delete_content(
content_hash: str, cid: str,
ctx: UserContext = Depends(require_auth), ctx: UserContext = Depends(require_auth),
cache_service: CacheService = Depends(get_cache_service), cache_service: CacheService = Depends(get_cache_service),
): ):
"""Delete content from cache.""" """Delete content from cache."""
success, error = await cache_service.delete_content(content_hash, ctx.actor_id) success, error = await cache_service.delete_content(cid, ctx.actor_id)
if error: if error:
raise HTTPException(400 if "Cannot" in error or "pinned" in error else 404, error) raise HTTPException(400 if "Cannot" in error or "pinned" in error else 404, error)
@@ -195,12 +195,12 @@ async def import_from_ipfs(
cache_service: CacheService = Depends(get_cache_service), cache_service: CacheService = Depends(get_cache_service),
): ):
"""Import content from IPFS.""" """Import content from IPFS."""
content_hash, error = await cache_service.import_from_ipfs(ipfs_cid, ctx.actor_id) cid, error = await cache_service.import_from_ipfs(ipfs_cid, ctx.actor_id)
if error: if error:
raise HTTPException(400, error) raise HTTPException(400, error)
return {"content_hash": content_hash, "imported": True} return {"cid": cid, "imported": True}
@router.post("/upload") @router.post("/upload")
@@ -211,7 +211,7 @@ async def upload_content(
): ):
"""Upload content to cache and IPFS.""" """Upload content to cache and IPFS."""
content = await file.read() content = await file.read()
content_hash, ipfs_cid, error = await cache_service.upload_content( cid, ipfs_cid, error = await cache_service.upload_content(
content=content, content=content,
filename=file.filename, filename=file.filename,
actor_id=ctx.actor_id, actor_id=ctx.actor_id,
@@ -222,7 +222,7 @@ async def upload_content(
return { return {
"cid": ipfs_cid, "cid": ipfs_cid,
"content_hash": content_hash, # Legacy, for backwards compatibility "cid": cid, # Legacy, for backwards compatibility
"filename": file.filename, "filename": file.filename,
"size": len(content), "size": len(content),
"uploaded": True, "uploaded": True,
@@ -272,9 +272,9 @@ async def list_media(
# HTMX metadata form # HTMX metadata form
@router.get("/{content_hash}/meta-form", response_class=HTMLResponse) @router.get("/{cid}/meta-form", response_class=HTMLResponse)
async def get_metadata_form( async def get_metadata_form(
content_hash: str, cid: str,
request: Request, request: Request,
cache_service: CacheService = Depends(get_cache_service), cache_service: CacheService = Depends(get_cache_service),
): ):
@@ -285,11 +285,11 @@ async def get_metadata_form(
if not ctx: if not ctx:
return HTMLResponse('<div class="text-red-400">Login required</div>') return HTMLResponse('<div class="text-red-400">Login required</div>')
meta = await cache_service.get_metadata(content_hash, ctx.actor_id) meta = await cache_service.get_metadata(cid, ctx.actor_id)
return HTMLResponse(f''' return HTMLResponse(f'''
<h2 class="text-lg font-semibold mb-4">Metadata</h2> <h2 class="text-lg font-semibold mb-4">Metadata</h2>
<form hx-patch="/cache/{content_hash}/meta" <form hx-patch="/cache/{cid}/meta"
hx-target="#metadata-section" hx-target="#metadata-section"
hx-swap="innerHTML" hx-swap="innerHTML"
class="space-y-4"> class="space-y-4">
@@ -312,9 +312,9 @@ async def get_metadata_form(
''') ''')
@router.patch("/{content_hash}/meta", response_class=HTMLResponse) @router.patch("/{cid}/meta", response_class=HTMLResponse)
async def update_metadata_htmx( async def update_metadata_htmx(
content_hash: str, cid: str,
request: Request, request: Request,
cache_service: CacheService = Depends(get_cache_service), cache_service: CacheService = Depends(get_cache_service),
): ):
@@ -328,7 +328,7 @@ async def update_metadata_htmx(
form_data = await request.form() form_data = await request.form()
success, error = await cache_service.update_metadata( success, error = await cache_service.update_metadata(
content_hash=content_hash, cid=cid,
actor_id=ctx.actor_id, actor_id=ctx.actor_id,
title=form_data.get("title"), title=form_data.get("title"),
description=form_data.get("description"), description=form_data.get("description"),

View File

@@ -350,7 +350,7 @@ async def run_recipe(
if node.get("type") == "SOURCE" and "asset" in config: if node.get("type") == "SOURCE" and "asset" in config:
asset_name = config["asset"] asset_name = config["asset"]
if asset_name in assets: if asset_name in assets:
config["content_hash"] = assets[asset_name].get("hash") config["cid"] = assets[asset_name].get("hash")
# Resolve effect references for EFFECT nodes # Resolve effect references for EFFECT nodes
if node.get("type") == "EFFECT" and "effect" in config: if node.get("type") == "EFFECT" and "effect" in config:
@@ -392,21 +392,21 @@ async def run_recipe(
input_name_to_node[node["name"].replace("-", "_")] = node_id input_name_to_node[node["name"].replace("-", "_")] = node_id
# Map user-provided input names to content hashes (for variable inputs) # Map user-provided input names to content hashes (for variable inputs)
for input_name, content_hash in req.inputs.items(): for input_name, cid in req.inputs.items():
# Try direct node ID match first # Try direct node ID match first
if input_name in nodes: if input_name in nodes:
node = nodes[input_name] node = nodes[input_name]
if node.get("node_type") == "SOURCE": if node.get("node_type") == "SOURCE":
if "config" not in node: if "config" not in node:
node["config"] = {} node["config"] = {}
node["config"]["content_hash"] = content_hash node["config"]["cid"] = cid
# Try input name lookup # Try input name lookup
elif input_name in input_name_to_node: elif input_name in input_name_to_node:
node_id = input_name_to_node[input_name] node_id = input_name_to_node[input_name]
node = nodes[node_id] node = nodes[node_id]
if "config" not in node: if "config" not in node:
node["config"] = {} node["config"] = {}
node["config"]["content_hash"] = content_hash node["config"]["cid"] = cid
# Transform output to output_id # Transform output to output_id
if "output" in dag_copy: if "output" in dag_copy:
@@ -527,7 +527,7 @@ async def publish_recipe(
# Use cache service to publish (recipes are stored in cache) # Use cache service to publish (recipes are stored in cache)
cache_service = CacheService(database, get_cache_manager()) cache_service = CacheService(database, get_cache_manager())
ipfs_cid, error = await cache_service.publish_to_l2( ipfs_cid, error = await cache_service.publish_to_l2(
content_hash=recipe_id, cid=recipe_id,
actor_id=ctx.actor_id, actor_id=ctx.actor_id,
l2_server=ctx.l2_server, l2_server=ctx.l2_server,
auth_token=request.cookies.get("auth_token"), auth_token=request.cookies.get("auth_token"),

View File

@@ -99,7 +99,7 @@ class RunStatus(BaseModel):
output_name: Optional[str] = None output_name: Optional[str] = None
created_at: Optional[str] = None created_at: Optional[str] = None
completed_at: Optional[str] = None completed_at: Optional[str] = None
output_hash: Optional[str] = None output_cid: Optional[str] = None
username: Optional[str] = None username: Optional[str] = None
provenance_cid: Optional[str] = None provenance_cid: Optional[str] = None
celery_task_id: Optional[str] = None celery_task_id: Optional[str] = None
@@ -244,13 +244,13 @@ async def get_run(
# Build artifacts list from output and inputs # Build artifacts list from output and inputs
artifacts = [] artifacts = []
output_media_type = None output_media_type = None
if run.get("output_hash"): if run.get("output_cid"):
# Detect media type using magic bytes # Detect media type using magic bytes
output_hash = run["output_hash"] output_cid = run["output_cid"]
media_type = None media_type = None
try: try:
from ..services.run_service import detect_media_type from ..services.run_service import detect_media_type
cache_path = get_cache_manager().get_by_content_hash(output_hash) cache_path = get_cache_manager().get_by_cid(output_cid)
if cache_path and cache_path.exists(): if cache_path and cache_path.exists():
simple_type = detect_media_type(cache_path) simple_type = detect_media_type(cache_path)
media_type = type_to_mime(simple_type) media_type = type_to_mime(simple_type)
@@ -258,7 +258,7 @@ async def get_run(
except Exception: except Exception:
pass pass
artifacts.append({ artifacts.append({
"hash": output_hash, "hash": output_cid,
"step_name": "Output", "step_name": "Output",
"media_type": media_type or "application/octet-stream", "media_type": media_type or "application/octet-stream",
}) })
@@ -271,7 +271,7 @@ async def get_run(
for i, input_hash in enumerate(run["inputs"]): for i, input_hash in enumerate(run["inputs"]):
media_type = None media_type = None
try: try:
cache_path = cache_manager.get_by_content_hash(input_hash) cache_path = cache_manager.get_by_cid(input_hash)
if cache_path and cache_path.exists(): if cache_path and cache_path.exists():
simple_type = detect_media_type(cache_path) simple_type = detect_media_type(cache_path)
media_type = type_to_mime(simple_type) media_type = type_to_mime(simple_type)
@@ -393,9 +393,9 @@ async def list_runs(
for run in runs: for run in runs:
# Add output media info # Add output media info
if run.get("output_hash"): if run.get("output_cid"):
try: try:
cache_path = cache_manager.get_by_content_hash(run["output_hash"]) cache_path = cache_manager.get_by_cid(run["output_cid"])
if cache_path and cache_path.exists(): if cache_path and cache_path.exists():
simple_type = detect_media_type(cache_path) simple_type = detect_media_type(cache_path)
run["output_media_type"] = type_to_mime(simple_type) run["output_media_type"] = type_to_mime(simple_type)
@@ -409,7 +409,7 @@ async def list_runs(
for input_hash in inputs[:3]: for input_hash in inputs[:3]:
preview = {"hash": input_hash, "media_type": None} preview = {"hash": input_hash, "media_type": None}
try: try:
cache_path = cache_manager.get_by_content_hash(input_hash) cache_path = cache_manager.get_by_cid(input_hash)
if cache_path and cache_path.exists(): if cache_path and cache_path.exists():
simple_type = detect_media_type(cache_path) simple_type = detect_media_type(cache_path)
preview["media_type"] = type_to_mime(simple_type) preview["media_type"] = type_to_mime(simple_type)
@@ -756,8 +756,8 @@ async def publish_run(
raise HTTPException(404, "Run not found") raise HTTPException(404, "Run not found")
# Check if run has output # Check if run has output
output_hash = run.get("output_hash") output_cid = run.get("output_cid")
if not output_hash: if not output_cid:
error = "Run has no output to publish" error = "Run has no output to publish"
if wants_html(request): if wants_html(request):
return HTMLResponse(f'<span class="text-red-400">{error}</span>') return HTMLResponse(f'<span class="text-red-400">{error}</span>')
@@ -766,7 +766,7 @@ async def publish_run(
# Use cache service to publish the output # Use cache service to publish the output
cache_service = CacheService(database, get_cache_manager()) cache_service = CacheService(database, get_cache_manager())
ipfs_cid, error = await cache_service.publish_to_l2( ipfs_cid, error = await cache_service.publish_to_l2(
content_hash=output_hash, cid=output_cid,
actor_id=ctx.actor_id, actor_id=ctx.actor_id,
l2_server=ctx.l2_server, l2_server=ctx.l2_server,
auth_token=request.cookies.get("auth_token"), auth_token=request.cookies.get("auth_token"),
@@ -780,4 +780,4 @@ async def publish_run(
if wants_html(request): if wants_html(request):
return HTMLResponse(f'<span class="text-green-400">Shared: {ipfs_cid[:16]}...</span>') return HTMLResponse(f'<span class="text-green-400">Shared: {ipfs_cid[:16]}...</span>')
return {"ipfs_cid": ipfs_cid, "output_hash": output_hash, "published": True} return {"ipfs_cid": ipfs_cid, "output_cid": output_cid, "published": True}

View File

@@ -91,26 +91,26 @@ class CacheService:
self.cache = cache_manager self.cache = cache_manager
self.cache_dir = Path(os.environ.get("CACHE_DIR", "/tmp/artdag-cache")) self.cache_dir = Path(os.environ.get("CACHE_DIR", "/tmp/artdag-cache"))
async def get_cache_item(self, content_hash: str) -> Optional[Dict[str, Any]]: async def get_cache_item(self, cid: str) -> Optional[Dict[str, Any]]:
"""Get cached item with full metadata for display.""" """Get cached item with full metadata for display."""
# Check if content exists # Check if content exists
if not self.cache.has_content(content_hash): if not self.cache.has_content(cid):
return None return None
path = self.cache.get_by_content_hash(content_hash) path = self.cache.get_by_cid(cid)
if not path or not path.exists(): if not path or not path.exists():
return None return None
# Get metadata from database # Get metadata from database
meta = await self.db.load_item_metadata(content_hash, None) meta = await self.db.load_item_metadata(cid, None)
cache_item = await self.db.get_cache_item(content_hash) cache_item = await self.db.get_cache_item(cid)
media_type = detect_media_type(path) media_type = detect_media_type(path)
mime_type = get_mime_type(path) mime_type = get_mime_type(path)
size = path.stat().st_size size = path.stat().st_size
return { return {
"content_hash": content_hash, "cid": cid,
"path": str(path), "path": str(path),
"media_type": media_type, "media_type": media_type,
"mime_type": mime_type, "mime_type": mime_type,
@@ -119,10 +119,10 @@ class CacheService:
"meta": meta, "meta": meta,
} }
async def check_access(self, content_hash: str, actor_id: str, username: str) -> bool: async def check_access(self, cid: str, actor_id: str, username: str) -> bool:
"""Check if user has access to content.""" """Check if user has access to content."""
user_hashes = await self._get_user_cache_hashes(username, actor_id) user_hashes = await self._get_user_cache_hashes(username, actor_id)
return content_hash in user_hashes return cid in user_hashes
async def _get_user_cache_hashes(self, username: str, actor_id: Optional[str] = None) -> set: async def _get_user_cache_hashes(self, username: str, actor_id: Optional[str] = None) -> set:
"""Get all cache hashes owned by or associated with a user.""" """Get all cache hashes owned by or associated with a user."""
@@ -137,7 +137,7 @@ class CacheService:
try: try:
db_items = await self.db.get_user_items(actor_id) db_items = await self.db.get_user_items(actor_id)
for item in db_items: for item in db_items:
hashes.add(item["content_hash"]) hashes.add(item["cid"])
except Exception: except Exception:
pass pass
@@ -160,8 +160,8 @@ class CacheService:
if isinstance(inputs, dict): if isinstance(inputs, dict):
inputs = list(inputs.values()) inputs = list(inputs.values())
hashes.update(inputs) hashes.update(inputs)
if run.get("output_hash"): if run.get("output_cid"):
hashes.add(run["output_hash"]) hashes.add(run["output_cid"])
return hashes return hashes
@@ -188,12 +188,12 @@ class CacheService:
return runs return runs
async def get_raw_file(self, content_hash: str) -> Tuple[Optional[Path], Optional[str], Optional[str]]: async def get_raw_file(self, cid: str) -> Tuple[Optional[Path], Optional[str], Optional[str]]:
"""Get raw file path, media type, and filename for download.""" """Get raw file path, media type, and filename for download."""
if not self.cache.has_content(content_hash): if not self.cache.has_content(cid):
return None, None, None return None, None, None
path = self.cache.get_by_content_hash(content_hash) path = self.cache.get_by_cid(cid)
if not path or not path.exists(): if not path or not path.exists():
return None, None, None return None, None, None
@@ -223,17 +223,17 @@ class CacheService:
except Exception: except Exception:
ext = "jpg" ext = "jpg"
filename = f"{content_hash}.{ext}" filename = f"{cid}.{ext}"
return path, mime, filename return path, mime, filename
async def get_as_mp4(self, content_hash: str) -> Tuple[Optional[Path], Optional[str]]: async def get_as_mp4(self, cid: str) -> Tuple[Optional[Path], Optional[str]]:
"""Get content as MP4, transcoding if necessary. Returns (path, error).""" """Get content as MP4, transcoding if necessary. Returns (path, error)."""
if not self.cache.has_content(content_hash): if not self.cache.has_content(cid):
return None, f"Content {content_hash} not in cache" return None, f"Content {cid} not in cache"
path = self.cache.get_by_content_hash(content_hash) path = self.cache.get_by_cid(cid)
if not path or not path.exists(): if not path or not path.exists():
return None, f"Content {content_hash} not in cache" return None, f"Content {cid} not in cache"
# Check if video # Check if video
media_type = detect_media_type(path) media_type = detect_media_type(path)
@@ -241,7 +241,7 @@ class CacheService:
return None, "Content is not a video" return None, "Content is not a video"
# Check for cached MP4 # Check for cached MP4
mp4_path = self.cache_dir / f"{content_hash}.mp4" mp4_path = self.cache_dir / f"{cid}.mp4"
if mp4_path.exists(): if mp4_path.exists():
return mp4_path, None return mp4_path, None
@@ -258,7 +258,7 @@ class CacheService:
pass pass
# Transcode to MP4 # Transcode to MP4
transcode_path = self.cache_dir / f"{content_hash}.transcoding.mp4" transcode_path = self.cache_dir / f"{cid}.transcoding.mp4"
try: try:
result = subprocess.run( result = subprocess.run(
["ffmpeg", "-y", "-i", str(path), ["ffmpeg", "-y", "-i", str(path),
@@ -283,15 +283,15 @@ class CacheService:
transcode_path.unlink() transcode_path.unlink()
return None, f"Transcoding failed: {e}" return None, f"Transcoding failed: {e}"
async def get_metadata(self, content_hash: str, actor_id: str) -> Optional[Dict[str, Any]]: async def get_metadata(self, cid: str, actor_id: str) -> Optional[Dict[str, Any]]:
"""Get content metadata.""" """Get content metadata."""
if not self.cache.has_content(content_hash): if not self.cache.has_content(cid):
return None return None
return await self.db.load_item_metadata(content_hash, actor_id) return await self.db.load_item_metadata(cid, actor_id)
async def update_metadata( async def update_metadata(
self, self,
content_hash: str, cid: str,
actor_id: str, actor_id: str,
title: str = None, title: str = None,
description: str = None, description: str = None,
@@ -299,7 +299,7 @@ class CacheService:
custom: Dict[str, Any] = None, custom: Dict[str, Any] = None,
) -> Tuple[bool, Optional[str]]: ) -> Tuple[bool, Optional[str]]:
"""Update content metadata. Returns (success, error).""" """Update content metadata. Returns (success, error)."""
if not self.cache.has_content(content_hash): if not self.cache.has_content(cid):
return False, "Content not found" return False, "Content not found"
# Build update dict # Build update dict
@@ -314,28 +314,28 @@ class CacheService:
updates["custom"] = custom updates["custom"] = custom
try: try:
await self.db.update_item_metadata(content_hash, actor_id, **updates) await self.db.update_item_metadata(cid, actor_id, **updates)
return True, None return True, None
except Exception as e: except Exception as e:
return False, str(e) return False, str(e)
async def publish_to_l2( async def publish_to_l2(
self, self,
content_hash: str, cid: str,
actor_id: str, actor_id: str,
l2_server: str, l2_server: str,
auth_token: str, auth_token: str,
) -> Tuple[Optional[str], Optional[str]]: ) -> Tuple[Optional[str], Optional[str]]:
"""Publish content to L2 and IPFS. Returns (ipfs_cid, error).""" """Publish content to L2 and IPFS. Returns (ipfs_cid, error)."""
if not self.cache.has_content(content_hash): if not self.cache.has_content(cid):
return None, "Content not found" return None, "Content not found"
# Get IPFS CID # Get IPFS CID
cache_item = await self.db.get_cache_item(content_hash) cache_item = await self.db.get_cache_item(cid)
ipfs_cid = cache_item.get("ipfs_cid") if cache_item else None ipfs_cid = cache_item.get("ipfs_cid") if cache_item else None
# Get metadata for origin info # Get metadata for origin info
meta = await self.db.load_item_metadata(content_hash, actor_id) meta = await self.db.load_item_metadata(cid, actor_id)
origin = meta.get("origin") if meta else None origin = meta.get("origin") if meta else None
if not origin or "type" not in origin: if not origin or "type" not in origin:
@@ -351,10 +351,10 @@ class CacheService:
f"{l2_server}/assets/publish-cache", f"{l2_server}/assets/publish-cache",
headers={"Authorization": f"Bearer {auth_token}"}, headers={"Authorization": f"Bearer {auth_token}"},
json={ json={
"content_hash": content_hash, "cid": cid,
"ipfs_cid": ipfs_cid, "ipfs_cid": ipfs_cid,
"asset_name": meta.get("title") or content_hash[:16], "asset_name": meta.get("title") or cid[:16],
"asset_type": detect_media_type(self.cache.get_by_content_hash(content_hash)), "asset_type": detect_media_type(self.cache.get_by_cid(cid)),
"origin": origin, "origin": origin,
"description": meta.get("description"), "description": meta.get("description"),
"tags": meta.get("tags", []), "tags": meta.get("tags", []),
@@ -374,14 +374,14 @@ class CacheService:
# Update local metadata with publish status # Update local metadata with publish status
await self.db.save_l2_share( await self.db.save_l2_share(
content_hash=content_hash, cid=cid,
actor_id=actor_id, actor_id=actor_id,
l2_server=l2_server, l2_server=l2_server,
asset_name=meta.get("title") or content_hash[:16], asset_name=meta.get("title") or cid[:16],
content_type=detect_media_type(self.cache.get_by_content_hash(content_hash)) content_type=detect_media_type(self.cache.get_by_cid(cid))
) )
await self.db.update_item_metadata( await self.db.update_item_metadata(
content_hash=content_hash, cid=cid,
actor_id=actor_id, actor_id=actor_id,
pinned=True, pinned=True,
pin_reason="published" pin_reason="published"
@@ -389,37 +389,37 @@ class CacheService:
return l2_result.get("ipfs_cid") or ipfs_cid, None return l2_result.get("ipfs_cid") or ipfs_cid, None
async def delete_content(self, content_hash: str, actor_id: str) -> Tuple[bool, Optional[str]]: async def delete_content(self, cid: str, actor_id: str) -> Tuple[bool, Optional[str]]:
"""Delete content from cache. Returns (success, error).""" """Delete content from cache. Returns (success, error)."""
if not self.cache.has_content(content_hash): if not self.cache.has_content(cid):
return False, "Content not found" return False, "Content not found"
# Check if pinned # Check if pinned
meta = await self.db.load_item_metadata(content_hash, actor_id) meta = await self.db.load_item_metadata(cid, actor_id)
if meta and meta.get("pinned"): if meta and meta.get("pinned"):
pin_reason = meta.get("pin_reason", "unknown") pin_reason = meta.get("pin_reason", "unknown")
return False, f"Cannot discard pinned item (reason: {pin_reason})" return False, f"Cannot discard pinned item (reason: {pin_reason})"
# Check deletion rules via cache_manager # Check deletion rules via cache_manager
can_delete, reason = self.cache.can_delete(content_hash) can_delete, reason = self.cache.can_delete(cid)
if not can_delete: if not can_delete:
return False, f"Cannot discard: {reason}" return False, f"Cannot discard: {reason}"
# Delete via cache_manager # Delete via cache_manager
success, msg = self.cache.delete_by_content_hash(content_hash) success, msg = self.cache.delete_by_cid(cid)
# Clean up legacy metadata files # Clean up legacy metadata files
meta_path = self.cache_dir / f"{content_hash}.meta.json" meta_path = self.cache_dir / f"{cid}.meta.json"
if meta_path.exists(): if meta_path.exists():
meta_path.unlink() meta_path.unlink()
mp4_path = self.cache_dir / f"{content_hash}.mp4" mp4_path = self.cache_dir / f"{cid}.mp4"
if mp4_path.exists(): if mp4_path.exists():
mp4_path.unlink() mp4_path.unlink()
return True, None return True, None
async def import_from_ipfs(self, ipfs_cid: str, actor_id: str) -> Tuple[Optional[str], Optional[str]]: async def import_from_ipfs(self, ipfs_cid: str, actor_id: str) -> Tuple[Optional[str], Optional[str]]:
"""Import content from IPFS. Returns (content_hash, error).""" """Import content from IPFS. Returns (cid, error)."""
try: try:
import ipfs_client import ipfs_client
@@ -433,18 +433,18 @@ class CacheService:
# Store in cache # Store in cache
cached, _ = self.cache.put(tmp_path, node_type="import", move=True) cached, _ = self.cache.put(tmp_path, node_type="import", move=True)
content_hash = cached.content_hash cid = cached.cid
# Save to database # Save to database
await self.db.create_cache_item(content_hash, ipfs_cid) await self.db.create_cache_item(cid, ipfs_cid)
await self.db.save_item_metadata( await self.db.save_item_metadata(
content_hash=content_hash, cid=cid,
actor_id=actor_id, actor_id=actor_id,
item_type="media", item_type="media",
filename=f"ipfs-{ipfs_cid[:16]}" filename=f"ipfs-{ipfs_cid[:16]}"
) )
return content_hash, None return cid, None
except Exception as e: except Exception as e:
return None, f"Import failed: {e}" return None, f"Import failed: {e}"
@@ -454,7 +454,7 @@ class CacheService:
filename: str, filename: str,
actor_id: str, actor_id: str,
) -> Tuple[Optional[str], Optional[str], Optional[str]]: ) -> Tuple[Optional[str], Optional[str], Optional[str]]:
"""Upload content to cache. Returns (content_hash, ipfs_cid, error).""" """Upload content to cache. Returns (cid, ipfs_cid, error)."""
import tempfile import tempfile
try: try:
@@ -468,18 +468,18 @@ class CacheService:
# Store in cache (also stores in IPFS) # Store in cache (also stores in IPFS)
cached, ipfs_cid = self.cache.put(tmp_path, node_type="upload", move=True) cached, ipfs_cid = self.cache.put(tmp_path, node_type="upload", move=True)
content_hash = cached.content_hash cid = cached.cid
# Save to database with detected MIME type # Save to database with detected MIME type
await self.db.create_cache_item(content_hash, ipfs_cid) await self.db.create_cache_item(cid, ipfs_cid)
await self.db.save_item_metadata( await self.db.save_item_metadata(
content_hash=content_hash, cid=cid,
actor_id=actor_id, actor_id=actor_id,
item_type=mime_type, # Store actual MIME type item_type=mime_type, # Store actual MIME type
filename=filename filename=filename
) )
return content_hash, ipfs_cid, None return cid, ipfs_cid, None
except Exception as e: except Exception as e:
return None, None, f"Upload failed: {e}" return None, None, f"Upload failed: {e}"
@@ -502,10 +502,10 @@ class CacheService:
return items return items
# Legacy compatibility methods # Legacy compatibility methods
def has_content(self, content_hash: str) -> bool: def has_content(self, cid: str) -> bool:
"""Check if content exists in cache.""" """Check if content exists in cache."""
return self.cache.has_content(content_hash) return self.cache.has_content(cid)
def get_ipfs_cid(self, content_hash: str) -> Optional[str]: def get_ipfs_cid(self, cid: str) -> Optional[str]:
"""Get IPFS CID for cached content.""" """Get IPFS CID for cached content."""
return self.cache.get_ipfs_cid(content_hash) return self.cache.get_ipfs_cid(cid)

View File

@@ -27,7 +27,7 @@ class RecipeService:
async def get_recipe(self, recipe_id: str) -> Optional[Dict[str, Any]]: async def get_recipe(self, recipe_id: str) -> Optional[Dict[str, Any]]:
"""Get a recipe by ID (content hash).""" """Get a recipe by ID (content hash)."""
# Get from cache (content-addressed storage) # Get from cache (content-addressed storage)
path = self.cache.get_by_content_hash(recipe_id) path = self.cache.get_by_cid(recipe_id)
if not path or not path.exists(): if not path or not path.exists():
return None return None
@@ -70,8 +70,8 @@ class RecipeService:
if hasattr(self.cache, 'list_by_type'): if hasattr(self.cache, 'list_by_type'):
items = self.cache.list_by_type('recipe') items = self.cache.list_by_type('recipe')
logger.info(f"Found {len(items)} recipes in cache") logger.info(f"Found {len(items)} recipes in cache")
for content_hash in items: for cid in items:
recipe = await self.get_recipe(content_hash) recipe = await self.get_recipe(cid)
if recipe and not recipe.get("error"): if recipe and not recipe.get("error"):
owner = recipe.get("owner") owner = recipe.get("owner")
# Filter by actor - L1 is per-user # Filter by actor - L1 is per-user
@@ -114,7 +114,7 @@ class RecipeService:
# Store in cache (content-addressed, auto-pins to IPFS) # Store in cache (content-addressed, auto-pins to IPFS)
cached, ipfs_cid = self.cache.put(tmp_path, node_type="recipe", move=True) cached, ipfs_cid = self.cache.put(tmp_path, node_type="recipe", move=True)
recipe_id = cached.content_hash recipe_id = cached.cid
return recipe_id, None return recipe_id, None
@@ -140,12 +140,12 @@ class RecipeService:
# Delete from cache # Delete from cache
try: try:
if hasattr(self.cache, 'delete_by_content_hash'): if hasattr(self.cache, 'delete_by_cid'):
success, msg = self.cache.delete_by_content_hash(recipe_id) success, msg = self.cache.delete_by_cid(recipe_id)
if not success: if not success:
return False, msg return False, msg
else: else:
path = self.cache.get_by_content_hash(recipe_id) path = self.cache.get_by_cid(recipe_id)
if path and path.exists(): if path and path.exists():
path.unlink() path.unlink()

View File

@@ -122,7 +122,7 @@ class RunService:
"status": "completed", "status": "completed",
"recipe": cached.get("recipe"), "recipe": cached.get("recipe"),
"inputs": self._ensure_inputs_list(cached.get("inputs")), "inputs": self._ensure_inputs_list(cached.get("inputs")),
"output_hash": cached.get("output_hash"), "output_cid": cached.get("output_cid"),
"ipfs_cid": cached.get("ipfs_cid"), "ipfs_cid": cached.get("ipfs_cid"),
"provenance_cid": cached.get("provenance_cid"), "provenance_cid": cached.get("provenance_cid"),
"actor_id": cached.get("actor_id"), "actor_id": cached.get("actor_id"),
@@ -171,7 +171,7 @@ class RunService:
run_data["status"] = "completed" run_data["status"] = "completed"
task_result = result.result task_result = result.result
if isinstance(task_result, dict): if isinstance(task_result, dict):
run_data["output_hash"] = task_result.get("output_hash") run_data["output_cid"] = task_result.get("output_cid")
else: else:
run_data["status"] = "failed" run_data["status"] = "failed"
run_data["error"] = str(result.result) run_data["error"] = str(result.result)
@@ -258,7 +258,7 @@ class RunService:
run_data["status"] = "completed" run_data["status"] = "completed"
task_result = result.result task_result = result.result
if isinstance(task_result, dict): if isinstance(task_result, dict):
run_data["output_hash"] = task_result.get("output_hash") run_data["output_cid"] = task_result.get("output_cid")
else: else:
run_data["status"] = "failed" run_data["status"] = "failed"
run_data["error"] = str(result.result) run_data["error"] = str(result.result)
@@ -332,15 +332,15 @@ class RunService:
# Check database cache first (completed runs) # Check database cache first (completed runs)
cached_run = await self.db.get_run_cache(run_id) cached_run = await self.db.get_run_cache(run_id)
if cached_run: if cached_run:
output_hash = cached_run.get("output_hash") output_cid = cached_run.get("output_cid")
if output_hash and self.cache.has_content(output_hash): if output_cid and self.cache.has_content(output_cid):
return { return {
"run_id": run_id, "run_id": run_id,
"status": "completed", "status": "completed",
"recipe": recipe, "recipe": recipe,
"inputs": input_list, "inputs": input_list,
"output_name": output_name, "output_name": output_name,
"output_hash": output_hash, "output_cid": output_cid,
"ipfs_cid": cached_run.get("ipfs_cid"), "ipfs_cid": cached_run.get("ipfs_cid"),
"provenance_cid": cached_run.get("provenance_cid"), "provenance_cid": cached_run.get("provenance_cid"),
"created_at": cached_run.get("created_at"), "created_at": cached_run.get("created_at"),
@@ -355,20 +355,20 @@ class RunService:
l2_resp = await client.get(f"{l2_server}/assets/by-run-id/{run_id}") l2_resp = await client.get(f"{l2_server}/assets/by-run-id/{run_id}")
if l2_resp.status_code == 200: if l2_resp.status_code == 200:
l2_data = l2_resp.json() l2_data = l2_resp.json()
output_hash = l2_data.get("output_hash") output_cid = l2_data.get("output_cid")
ipfs_cid = l2_data.get("ipfs_cid") ipfs_cid = l2_data.get("ipfs_cid")
if output_hash and ipfs_cid: if output_cid and ipfs_cid:
# Pull from IPFS to local cache # Pull from IPFS to local cache
try: try:
import ipfs_client import ipfs_client
legacy_dir = self.cache_dir / "legacy" legacy_dir = self.cache_dir / "legacy"
legacy_dir.mkdir(parents=True, exist_ok=True) legacy_dir.mkdir(parents=True, exist_ok=True)
recovery_path = legacy_dir / output_hash recovery_path = legacy_dir / output_cid
if ipfs_client.get_file(ipfs_cid, str(recovery_path)): if ipfs_client.get_file(ipfs_cid, str(recovery_path)):
# Save to database cache # Save to database cache
await self.db.save_run_cache( await self.db.save_run_cache(
run_id=run_id, run_id=run_id,
output_hash=output_hash, output_cid=output_cid,
recipe=recipe, recipe=recipe,
inputs=input_list, inputs=input_list,
ipfs_cid=ipfs_cid, ipfs_cid=ipfs_cid,
@@ -380,7 +380,7 @@ class RunService:
"status": "completed", "status": "completed",
"recipe": recipe, "recipe": recipe,
"inputs": input_list, "inputs": input_list,
"output_hash": output_hash, "output_cid": output_cid,
"ipfs_cid": ipfs_cid, "ipfs_cid": ipfs_cid,
"provenance_cid": l2_data.get("provenance_cid"), "provenance_cid": l2_data.get("provenance_cid"),
"created_at": datetime.now(timezone.utc).isoformat(), "created_at": datetime.now(timezone.utc).isoformat(),
@@ -493,7 +493,7 @@ class RunService:
plan_cache_id = run.get("plan_cache_id") plan_cache_id = run.get("plan_cache_id")
if plan_cache_id: if plan_cache_id:
# Get plan from cache by content hash # Get plan from cache by content hash
plan_path = self.cache.get_by_content_hash(plan_cache_id) plan_path = self.cache.get_by_cid(plan_cache_id)
if plan_path and plan_path.exists(): if plan_path and plan_path.exists():
with open(plan_path) as f: with open(plan_path) as f:
content = f.read() content = f.read()
@@ -535,12 +535,12 @@ class RunService:
artifacts = [] artifacts = []
def get_artifact_info(content_hash: str, role: str, name: str) -> Optional[Dict]: def get_artifact_info(cid: str, role: str, name: str) -> Optional[Dict]:
if self.cache.has_content(content_hash): if self.cache.has_content(cid):
path = self.cache.get_by_content_hash(content_hash) path = self.cache.get_by_cid(cid)
if path and path.exists(): if path and path.exists():
return { return {
"hash": content_hash, "hash": cid,
"size_bytes": path.stat().st_size, "size_bytes": path.stat().st_size,
"media_type": detect_media_type(path), "media_type": detect_media_type(path),
"role": role, "role": role,
@@ -558,8 +558,8 @@ class RunService:
artifacts.append(info) artifacts.append(info)
# Add output # Add output
if run.get("output_hash"): if run.get("output_cid"):
info = get_artifact_info(run["output_hash"], "output", "Output") info = get_artifact_info(run["output_cid"], "output", "Output")
if info: if info:
artifacts.append(info) artifacts.append(info)
@@ -669,10 +669,10 @@ class RunService:
if result.successful(): if result.successful():
# Task completed - move to run_cache # Task completed - move to run_cache
task_result = result.result task_result = result.result
if isinstance(task_result, dict) and task_result.get("output_hash"): if isinstance(task_result, dict) and task_result.get("output_cid"):
await self.db.save_run_cache( await self.db.save_run_cache(
run_id=run_id, run_id=run_id,
output_hash=task_result["output_hash"], output_cid=task_result["output_cid"],
recipe=run.get("recipe", "unknown"), recipe=run.get("recipe", "unknown"),
inputs=run.get("inputs", []), inputs=run.get("inputs", []),
ipfs_cid=task_result.get("ipfs_cid"), ipfs_cid=task_result.get("ipfs_cid"),

View File

@@ -1,29 +1,29 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block title %}{{ cache.content_hash[:16] }} - Cache - Art-DAG L1{% endblock %} {% block title %}{{ cache.cid[:16] }} - Cache - Art-DAG L1{% endblock %}
{% block content %} {% block content %}
<div class="max-w-4xl mx-auto"> <div class="max-w-4xl mx-auto">
<!-- Header --> <!-- Header -->
<div class="flex items-center space-x-4 mb-6"> <div class="flex items-center space-x-4 mb-6">
<a href="/media" class="text-gray-400 hover:text-white">&larr; Media</a> <a href="/media" class="text-gray-400 hover:text-white">&larr; Media</a>
<h1 class="text-xl font-bold font-mono">{{ cache.content_hash[:24] }}...</h1> <h1 class="text-xl font-bold font-mono">{{ cache.cid[:24] }}...</h1>
</div> </div>
<!-- Preview --> <!-- Preview -->
<div class="bg-gray-800 rounded-lg border border-gray-700 mb-6 overflow-hidden"> <div class="bg-gray-800 rounded-lg border border-gray-700 mb-6 overflow-hidden">
{% if cache.mime_type and cache.mime_type.startswith('image/') %} {% if cache.mime_type and cache.mime_type.startswith('image/') %}
<img src="/cache/{{ cache.content_hash }}/raw" alt="" <img src="/cache/{{ cache.cid }}/raw" alt=""
class="w-full max-h-96 object-contain bg-gray-900"> class="w-full max-h-96 object-contain bg-gray-900">
{% elif cache.mime_type and cache.mime_type.startswith('video/') %} {% elif cache.mime_type and cache.mime_type.startswith('video/') %}
<video src="/cache/{{ cache.content_hash }}/raw" controls <video src="/cache/{{ cache.cid }}/raw" controls
class="w-full max-h-96 bg-gray-900"> class="w-full max-h-96 bg-gray-900">
</video> </video>
{% elif cache.mime_type and cache.mime_type.startswith('audio/') %} {% elif cache.mime_type and cache.mime_type.startswith('audio/') %}
<div class="p-8 bg-gray-900"> <div class="p-8 bg-gray-900">
<audio src="/cache/{{ cache.content_hash }}/raw" controls class="w-full"></audio> <audio src="/cache/{{ cache.cid }}/raw" controls class="w-full"></audio>
</div> </div>
{% elif cache.mime_type == 'application/json' %} {% elif cache.mime_type == 'application/json' %}
@@ -42,8 +42,8 @@
<!-- Metadata --> <!-- Metadata -->
<div class="grid grid-cols-2 gap-4 mb-6"> <div class="grid grid-cols-2 gap-4 mb-6">
<div class="bg-gray-800 rounded-lg p-4"> <div class="bg-gray-800 rounded-lg p-4">
<div class="text-gray-500 text-sm">Hash</div> <div class="text-gray-500 text-sm">CID</div>
<div class="font-mono text-sm text-white break-all">{{ cache.content_hash }}</div> <div class="font-mono text-sm text-white break-all">{{ cache.cid }}</div>
</div> </div>
<div class="bg-gray-800 rounded-lg p-4"> <div class="bg-gray-800 rounded-lg p-4">
<div class="text-gray-500 text-sm">Content Type</div> <div class="text-gray-500 text-sm">Content Type</div>
@@ -92,12 +92,12 @@
<!-- Actions --> <!-- Actions -->
<div class="flex items-center space-x-4 mt-8"> <div class="flex items-center space-x-4 mt-8">
<a href="/cache/{{ cache.content_hash }}/raw" <a href="/cache/{{ cache.cid }}/raw"
download download
class="bg-blue-600 hover:bg-blue-700 px-4 py-2 rounded font-medium"> class="bg-blue-600 hover:bg-blue-700 px-4 py-2 rounded font-medium">
Download Download
</a> </a>
<button hx-post="/cache/{{ cache.content_hash }}/publish" <button hx-post="/cache/{{ cache.cid }}/publish"
hx-target="#share-result" hx-target="#share-result"
class="bg-purple-600 hover:bg-purple-700 px-4 py-2 rounded font-medium"> class="bg-purple-600 hover:bg-purple-700 px-4 py-2 rounded font-medium">
Share to L2 Share to L2

View File

@@ -25,19 +25,19 @@
{% set is_video = item.type in ('video', 'video/mp4', 'video/webm', 'video/x-matroska') or (item.filename and item.filename.lower().endswith(('.mp4', '.mkv', '.webm', '.mov'))) %} {% set is_video = item.type in ('video', 'video/mp4', 'video/webm', 'video/x-matroska') or (item.filename and item.filename.lower().endswith(('.mp4', '.mkv', '.webm', '.mov'))) %}
{% set is_audio = item.type in ('audio', 'audio/mpeg', 'audio/wav', 'audio/flac') or (item.filename and item.filename.lower().endswith(('.mp3', '.wav', '.flac', '.ogg'))) %} {% set is_audio = item.type in ('audio', 'audio/mpeg', 'audio/wav', 'audio/flac') or (item.filename and item.filename.lower().endswith(('.mp3', '.wav', '.flac', '.ogg'))) %}
<a href="/cache/{{ item.content_hash }}" <a href="/cache/{{ item.cid }}"
class="media-item bg-gray-800 rounded-lg overflow-hidden hover:ring-2 hover:ring-blue-500 transition-all" class="media-item bg-gray-800 rounded-lg overflow-hidden hover:ring-2 hover:ring-blue-500 transition-all"
data-type="{% if is_image %}image{% elif is_video %}video{% elif is_audio %}audio{% else %}other{% endif %}"> data-type="{% if is_image %}image{% elif is_video %}video{% elif is_audio %}audio{% else %}other{% endif %}">
{% if is_image %} {% if is_image %}
<img src="/cache/{{ item.content_hash }}/raw" <img src="/cache/{{ item.cid }}/raw"
alt="" alt=""
loading="lazy" loading="lazy"
class="w-full h-40 object-cover"> class="w-full h-40 object-cover">
{% elif is_video %} {% elif is_video %}
<div class="relative"> <div class="relative">
<video src="/cache/{{ item.content_hash }}/raw" <video src="/cache/{{ item.cid }}/raw"
class="w-full h-40 object-cover" class="w-full h-40 object-cover"
muted muted
onmouseover="this.play()" onmouseover="this.play()"
@@ -68,7 +68,7 @@
{% endif %} {% endif %}
<div class="p-3"> <div class="p-3">
<div class="font-mono text-xs text-gray-500 truncate">{{ item.content_hash[:16] }}...</div> <div class="font-mono text-xs text-gray-500 truncate">{{ item.cid[:16] }}...</div>
{% if item.filename %} {% if item.filename %}
<div class="text-xs text-gray-600 truncate">{{ item.filename }}</div> <div class="text-xs text-gray-600 truncate">{{ item.filename }}</div>
{% endif %} {% endif %}

View File

@@ -44,9 +44,9 @@
<span class="text-xs text-gray-500 mr-1">In:</span> <span class="text-xs text-gray-500 mr-1">In:</span>
{% for inp in run.input_previews %} {% for inp in run.input_previews %}
{% if inp.media_type and inp.media_type.startswith('image/') %} {% if inp.media_type and inp.media_type.startswith('image/') %}
<img src="/cache/{{ inp.hash }}/raw" alt="" class="w-10 h-10 object-cover rounded"> <img src="/cache/{{ inp.cid }}/raw" alt="" class="w-10 h-10 object-cover rounded">
{% elif inp.media_type and inp.media_type.startswith('video/') %} {% elif inp.media_type and inp.media_type.startswith('video/') %}
<video src="/cache/{{ inp.hash }}/raw" class="w-10 h-10 object-cover rounded" muted></video> <video src="/cache/{{ inp.cid }}/raw" class="w-10 h-10 object-cover rounded" muted></video>
{% else %} {% else %}
<div class="w-10 h-10 bg-gray-700 rounded flex items-center justify-center text-gray-500 text-xs">?</div> <div class="w-10 h-10 bg-gray-700 rounded flex items-center justify-center text-gray-500 text-xs">?</div>
{% endif %} {% endif %}
@@ -65,13 +65,13 @@
<span class="text-gray-600">-></span> <span class="text-gray-600">-></span>
{# Output preview #} {# Output preview #}
{% if run.output_hash %} {% if run.output_cid %}
<div class="flex items-center space-x-1"> <div class="flex items-center space-x-1">
<span class="text-xs text-gray-500 mr-1">Out:</span> <span class="text-xs text-gray-500 mr-1">Out:</span>
{% if run.output_media_type and run.output_media_type.startswith('image/') %} {% if run.output_media_type and run.output_media_type.startswith('image/') %}
<img src="/cache/{{ run.output_hash }}/raw" alt="" class="w-10 h-10 object-cover rounded"> <img src="/cache/{{ run.output_cid }}/raw" alt="" class="w-10 h-10 object-cover rounded">
{% elif run.output_media_type and run.output_media_type.startswith('video/') %} {% elif run.output_media_type and run.output_media_type.startswith('video/') %}
<video src="/cache/{{ run.output_hash }}/raw" class="w-10 h-10 object-cover rounded" muted></video> <video src="/cache/{{ run.output_cid }}/raw" class="w-10 h-10 object-cover rounded" muted></video>
{% else %} {% else %}
<div class="w-10 h-10 bg-gray-700 rounded flex items-center justify-center text-gray-500 text-xs">?</div> <div class="w-10 h-10 bg-gray-700 rounded flex items-center justify-center text-gray-500 text-xs">?</div>
{% endif %} {% endif %}
@@ -82,8 +82,8 @@
<div class="flex-grow"></div> <div class="flex-grow"></div>
{% if run.output_hash %} {% if run.output_cid %}
<span class="font-mono text-xs text-gray-600">{{ run.output_hash[:12] }}...</span> <span class="font-mono text-xs text-gray-600">{{ run.output_cid[:12] }}...</span>
{% endif %} {% endif %}
</div> </div>
</a> </a>

View File

@@ -211,20 +211,20 @@
{% if artifacts %} {% if artifacts %}
<div class="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-4 gap-4"> <div class="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-4 gap-4">
{% for artifact in artifacts %} {% for artifact in artifacts %}
<a href="/cache/{{ artifact.hash }}" <a href="/cache/{{ artifact.cid }}"
class="bg-gray-800 rounded-lg p-4 hover:bg-gray-750 transition-colors"> class="bg-gray-800 rounded-lg p-4 hover:bg-gray-750 transition-colors">
{% if artifact.media_type and artifact.media_type.startswith('image/') %} {% if artifact.media_type and artifact.media_type.startswith('image/') %}
<img src="/cache/{{ artifact.hash }}/raw" alt="" <img src="/cache/{{ artifact.cid }}/raw" alt=""
class="w-full h-32 object-cover rounded mb-2"> class="w-full h-32 object-cover rounded mb-2">
{% elif artifact.media_type and artifact.media_type.startswith('video/') %} {% elif artifact.media_type and artifact.media_type.startswith('video/') %}
<video src="/cache/{{ artifact.hash }}/raw" <video src="/cache/{{ artifact.cid }}/raw"
class="w-full h-32 object-cover rounded mb-2" muted></video> class="w-full h-32 object-cover rounded mb-2" muted></video>
{% else %} {% else %}
<div class="w-full h-32 bg-gray-900 rounded mb-2 flex items-center justify-center text-gray-600"> <div class="w-full h-32 bg-gray-900 rounded mb-2 flex items-center justify-center text-gray-600">
{{ artifact.media_type or 'Unknown' }} {{ artifact.media_type or 'Unknown' }}
</div> </div>
{% endif %} {% endif %}
<div class="font-mono text-xs text-gray-500 truncate">{{ artifact.hash[:16] }}...</div> <div class="font-mono text-xs text-gray-500 truncate">{{ artifact.cid[:16] }}...</div>
<div class="text-sm text-gray-400">{{ artifact.step_name }}</div> <div class="text-sm text-gray-400">{{ artifact.step_name }}</div>
</a> </a>
{% endfor %} {% endfor %}
@@ -242,8 +242,8 @@
<div class="bg-gray-800 rounded-lg p-6"> <div class="bg-gray-800 rounded-lg p-6">
<div class="flex items-center justify-between mb-4"> <div class="flex items-center justify-between mb-4">
<h3 class="text-lg font-semibold">{{ item.input_name }}</h3> <h3 class="text-lg font-semibold">{{ item.input_name }}</h3>
<a href="/cache/{{ item.input_hash }}" class="font-mono text-xs text-blue-400 hover:text-blue-300"> <a href="/cache/{{ item.input_cid }}" class="font-mono text-xs text-blue-400 hover:text-blue-300">
{{ item.input_hash[:16] }}... {{ item.input_cid[:16] }}...
</a> </a>
</div> </div>
@@ -330,21 +330,21 @@
<div class="bg-gray-800 rounded-lg overflow-hidden"> <div class="bg-gray-800 rounded-lg overflow-hidden">
<!-- Media Preview --> <!-- Media Preview -->
{% if input.media_type and input.media_type.startswith('image/') %} {% if input.media_type and input.media_type.startswith('image/') %}
<a href="/cache/{{ input.hash }}" class="block"> <a href="/cache/{{ input.cid }}" class="block">
<img src="/cache/{{ input.hash }}/raw" alt="{{ input.name or 'Input' }}" <img src="/cache/{{ input.cid }}/raw" alt="{{ input.name or 'Input' }}"
class="w-full h-48 object-cover"> class="w-full h-48 object-cover">
</a> </a>
{% elif input.media_type and input.media_type.startswith('video/') %} {% elif input.media_type and input.media_type.startswith('video/') %}
<a href="/cache/{{ input.hash }}" class="block"> <a href="/cache/{{ input.cid }}" class="block">
<video src="/cache/{{ input.hash }}/raw" <video src="/cache/{{ input.cid }}/raw"
class="w-full h-48 object-cover" muted controls></video> class="w-full h-48 object-cover" muted controls></video>
</a> </a>
{% elif input.media_type and input.media_type.startswith('audio/') %} {% elif input.media_type and input.media_type.startswith('audio/') %}
<div class="p-4 bg-gray-900"> <div class="p-4 bg-gray-900">
<audio src="/cache/{{ input.hash }}/raw" controls class="w-full"></audio> <audio src="/cache/{{ input.cid }}/raw" controls class="w-full"></audio>
</div> </div>
{% else %} {% else %}
<a href="/cache/{{ input.hash }}" class="block"> <a href="/cache/{{ input.cid }}" class="block">
<div class="w-full h-48 bg-gray-900 flex items-center justify-center text-gray-600"> <div class="w-full h-48 bg-gray-900 flex items-center justify-center text-gray-600">
<div class="text-center"> <div class="text-center">
<div class="text-4xl mb-2">📄</div> <div class="text-4xl mb-2">📄</div>
@@ -358,8 +358,8 @@
{% if input.name %} {% if input.name %}
<div class="font-medium text-white mb-1">{{ input.name }}</div> <div class="font-medium text-white mb-1">{{ input.name }}</div>
{% endif %} {% endif %}
<a href="/cache/{{ input.hash }}" class="font-mono text-xs text-blue-400 hover:text-blue-300 block truncate"> <a href="/cache/{{ input.cid }}" class="font-mono text-xs text-blue-400 hover:text-blue-300 block truncate">
{{ input.hash }} {{ input.cid }}
</a> </a>
{% if input.media_type %} {% if input.media_type %}
<div class="text-xs text-gray-500 mt-1">{{ input.media_type }}</div> <div class="text-xs text-gray-500 mt-1">{{ input.media_type }}</div>
@@ -384,22 +384,22 @@
</div> </div>
<!-- Output --> <!-- Output -->
{% if run.output_hash %} {% if run.output_cid %}
<div class="mt-8 bg-gray-800 rounded-lg p-6"> <div class="mt-8 bg-gray-800 rounded-lg p-6">
<h3 class="text-lg font-semibold mb-4">Output</h3> <h3 class="text-lg font-semibold mb-4">Output</h3>
{# Inline media preview #} {# Inline media preview #}
<div class="mb-4"> <div class="mb-4">
{% if output_media_type and output_media_type.startswith('image/') %} {% if output_media_type and output_media_type.startswith('image/') %}
<a href="/cache/{{ run.output_hash }}" class="block"> <a href="/cache/{{ run.output_cid }}" class="block">
<img src="/cache/{{ run.output_hash }}/raw" alt="Output" <img src="/cache/{{ run.output_cid }}/raw" alt="Output"
class="max-w-full max-h-96 rounded-lg mx-auto"> class="max-w-full max-h-96 rounded-lg mx-auto">
</a> </a>
{% elif output_media_type and output_media_type.startswith('video/') %} {% elif output_media_type and output_media_type.startswith('video/') %}
<video src="/cache/{{ run.output_hash }}/raw" controls <video src="/cache/{{ run.output_cid }}/raw" controls
class="max-w-full max-h-96 rounded-lg mx-auto"></video> class="max-w-full max-h-96 rounded-lg mx-auto"></video>
{% elif output_media_type and output_media_type.startswith('audio/') %} {% elif output_media_type and output_media_type.startswith('audio/') %}
<audio src="/cache/{{ run.output_hash }}/raw" controls class="w-full"></audio> <audio src="/cache/{{ run.output_cid }}/raw" controls class="w-full"></audio>
{% else %} {% else %}
<div class="bg-gray-900 rounded-lg p-8 text-center text-gray-500"> <div class="bg-gray-900 rounded-lg p-8 text-center text-gray-500">
<div class="text-4xl mb-2">?</div> <div class="text-4xl mb-2">?</div>
@@ -409,8 +409,8 @@
</div> </div>
<div class="flex items-center justify-between"> <div class="flex items-center justify-between">
<a href="/cache/{{ run.output_hash }}" class="font-mono text-sm text-blue-400 hover:text-blue-300"> <a href="/cache/{{ run.output_cid }}" class="font-mono text-sm text-blue-400 hover:text-blue-300">
{{ run.output_hash }} {{ run.output_cid }}
</a> </a>
{% if run.output_ipfs_cid %} {% if run.output_ipfs_cid %}
<a href="https://ipfs.io/ipfs/{{ run.output_ipfs_cid }}" <a href="https://ipfs.io/ipfs/{{ run.output_ipfs_cid }}"

View File

@@ -3,7 +3,7 @@
Cache management for Art DAG L1 server. Cache management for Art DAG L1 server.
Integrates artdag's Cache, ActivityStore, and ActivityManager to provide: Integrates artdag's Cache, ActivityStore, and ActivityManager to provide:
- Content-addressed caching with both node_id and content_hash - Content-addressed caching with both node_id and cid
- Activity tracking for runs (input/output/intermediate relationships) - Activity tracking for runs (input/output/intermediate relationships)
- Deletion rules enforcement (shared items protected) - Deletion rules enforcement (shared items protected)
- L2 ActivityPub integration for "shared" status checks - L2 ActivityPub integration for "shared" status checks
@@ -35,7 +35,7 @@ logger = logging.getLogger(__name__)
def file_hash(path: Path, algorithm: str = "sha3_256") -> str: def file_hash(path: Path, algorithm: str = "sha3_256") -> str:
"""Compute SHA3-256 hash of a file.""" """Compute local content hash (fallback when IPFS unavailable)."""
hasher = hashlib.new(algorithm) hasher = hashlib.new(algorithm)
actual_path = path.resolve() if path.is_symlink() else path actual_path = path.resolve() if path.is_symlink() else path
with open(actual_path, "rb") as f: with open(actual_path, "rb") as f:
@@ -51,10 +51,10 @@ class CachedFile:
Provides a unified view combining: Provides a unified view combining:
- node_id: computation identity (for DAG caching) - node_id: computation identity (for DAG caching)
- content_hash: file content identity (for external references) - cid: file content identity (for external references)
""" """
node_id: str node_id: str
content_hash: str cid: str
path: Path path: Path
size_bytes: int size_bytes: int
node_type: str node_type: str
@@ -64,7 +64,7 @@ class CachedFile:
def from_cache_entry(cls, entry: CacheEntry) -> "CachedFile": def from_cache_entry(cls, entry: CacheEntry) -> "CachedFile":
return cls( return cls(
node_id=entry.node_id, node_id=entry.node_id,
content_hash=entry.content_hash, cid=entry.cid,
path=entry.output_path, path=entry.output_path,
size_bytes=entry.size_bytes, size_bytes=entry.size_bytes,
node_type=entry.node_type, node_type=entry.node_type,
@@ -84,41 +84,41 @@ class L2SharedChecker:
self.cache_ttl = cache_ttl self.cache_ttl = cache_ttl
self._cache: Dict[str, tuple[bool, float]] = {} self._cache: Dict[str, tuple[bool, float]] = {}
def is_shared(self, content_hash: str) -> bool: def is_shared(self, cid: str) -> bool:
"""Check if content_hash has been published to L2.""" """Check if cid has been published to L2."""
import time import time
now = time.time() now = time.time()
# Check cache # Check cache
if content_hash in self._cache: if cid in self._cache:
is_shared, cached_at = self._cache[content_hash] is_shared, cached_at = self._cache[cid]
if now - cached_at < self.cache_ttl: if now - cached_at < self.cache_ttl:
logger.debug(f"L2 check (cached): {content_hash[:16]}... = {is_shared}") logger.debug(f"L2 check (cached): {cid[:16]}... = {is_shared}")
return is_shared return is_shared
# Query L2 # Query L2
try: try:
url = f"{self.l2_server}/assets/by-hash/{content_hash}" url = f"{self.l2_server}/assets/by-hash/{cid}"
logger.info(f"L2 check: GET {url}") logger.info(f"L2 check: GET {url}")
resp = requests.get(url, timeout=5) resp = requests.get(url, timeout=5)
logger.info(f"L2 check response: {resp.status_code}") logger.info(f"L2 check response: {resp.status_code}")
is_shared = resp.status_code == 200 is_shared = resp.status_code == 200
except Exception as e: except Exception as e:
logger.warning(f"Failed to check L2 for {content_hash}: {e}") logger.warning(f"Failed to check L2 for {cid}: {e}")
# On error, assume IS shared (safer - prevents accidental deletion) # On error, assume IS shared (safer - prevents accidental deletion)
is_shared = True is_shared = True
self._cache[content_hash] = (is_shared, now) self._cache[cid] = (is_shared, now)
return is_shared return is_shared
def invalidate(self, content_hash: str): def invalidate(self, cid: str):
"""Invalidate cache for a content_hash (call after publishing).""" """Invalidate cache for a cid (call after publishing)."""
self._cache.pop(content_hash, None) self._cache.pop(cid, None)
def mark_shared(self, content_hash: str): def mark_shared(self, cid: str):
"""Mark as shared without querying (call after successful publish).""" """Mark as shared without querying (call after successful publish)."""
import time import time
self._cache[content_hash] = (True, time.time()) self._cache[cid] = (True, time.time())
class L1CacheManager: class L1CacheManager:
@@ -131,7 +131,7 @@ class L1CacheManager:
- ActivityManager for deletion rules - ActivityManager for deletion rules
- L2 integration for shared status - L2 integration for shared status
Provides both node_id and content_hash based access. Provides both node_id and cid based access.
""" """
def __init__( def __init__(
@@ -162,16 +162,16 @@ class L1CacheManager:
is_shared_fn=self._is_shared_by_node_id, is_shared_fn=self._is_shared_by_node_id,
) )
# Content hash index: content_hash -> node_id # Content hash index: cid -> node_id
# Uses Redis if available, falls back to in-memory dict # Uses Redis if available, falls back to in-memory dict
self._content_index: Dict[str, str] = {} self._content_index: Dict[str, str] = {}
self._load_content_index() self._load_content_index()
# IPFS CID index: content_hash -> ipfs_cid # IPFS CID index: cid -> ipfs_cid
self._ipfs_cids: Dict[str, str] = {} self._ipfs_cids: Dict[str, str] = {}
self._load_ipfs_index() self._load_ipfs_index()
# Legacy files directory (for files uploaded directly by content_hash) # Legacy files directory (for files uploaded directly by cid)
self.legacy_dir = self.cache_dir / "legacy" self.legacy_dir = self.cache_dir / "legacy"
self.legacy_dir.mkdir(parents=True, exist_ok=True) self.legacy_dir.mkdir(parents=True, exist_ok=True)
@@ -179,7 +179,7 @@ class L1CacheManager:
return self.cache_dir / "content_index.json" return self.cache_dir / "content_index.json"
def _load_content_index(self): def _load_content_index(self):
"""Load content_hash -> node_id index from Redis or JSON file.""" """Load cid -> node_id index from Redis or JSON file."""
# If Redis available and has data, use it # If Redis available and has data, use it
if self._redis: if self._redis:
try: try:
@@ -206,8 +206,8 @@ class L1CacheManager:
# Also index from existing cache entries # Also index from existing cache entries
for entry in self.cache.list_entries(): for entry in self.cache.list_entries():
if entry.content_hash: if entry.cid:
self._content_index[entry.content_hash] = entry.node_id self._content_index[entry.cid] = entry.node_id
# Migrate to Redis if available # Migrate to Redis if available
if self._redis and self._content_index: if self._redis and self._content_index:
@@ -218,39 +218,39 @@ class L1CacheManager:
logger.warning(f"Failed to migrate content index to Redis: {e}") logger.warning(f"Failed to migrate content index to Redis: {e}")
def _save_content_index(self): def _save_content_index(self):
"""Save content_hash -> node_id index to Redis and JSON file.""" """Save cid -> node_id index to Redis and JSON file."""
# Always save to JSON as backup # Always save to JSON as backup
with open(self._index_path(), "w") as f: with open(self._index_path(), "w") as f:
json.dump(self._content_index, f, indent=2) json.dump(self._content_index, f, indent=2)
def _set_content_index(self, content_hash: str, node_id: str): def _set_content_index(self, cid: str, node_id: str):
"""Set a single content index entry (Redis + in-memory).""" """Set a single content index entry (Redis + in-memory)."""
self._content_index[content_hash] = node_id self._content_index[cid] = node_id
if self._redis: if self._redis:
try: try:
self._redis.hset(self._redis_content_key, content_hash, node_id) self._redis.hset(self._redis_content_key, cid, node_id)
except Exception as e: except Exception as e:
logger.warning(f"Failed to set content index in Redis: {e}") logger.warning(f"Failed to set content index in Redis: {e}")
self._save_content_index() self._save_content_index()
def _get_content_index(self, content_hash: str) -> Optional[str]: def _get_content_index(self, cid: str) -> Optional[str]:
"""Get a content index entry (Redis-first, then in-memory).""" """Get a content index entry (Redis-first, then in-memory)."""
if self._redis: if self._redis:
try: try:
val = self._redis.hget(self._redis_content_key, content_hash) val = self._redis.hget(self._redis_content_key, cid)
if val: if val:
return val.decode() if isinstance(val, bytes) else val return val.decode() if isinstance(val, bytes) else val
except Exception as e: except Exception as e:
logger.warning(f"Failed to get content index from Redis: {e}") logger.warning(f"Failed to get content index from Redis: {e}")
return self._content_index.get(content_hash) return self._content_index.get(cid)
def _del_content_index(self, content_hash: str): def _del_content_index(self, cid: str):
"""Delete a content index entry.""" """Delete a content index entry."""
if content_hash in self._content_index: if cid in self._content_index:
del self._content_index[content_hash] del self._content_index[cid]
if self._redis: if self._redis:
try: try:
self._redis.hdel(self._redis_content_key, content_hash) self._redis.hdel(self._redis_content_key, cid)
except Exception as e: except Exception as e:
logger.warning(f"Failed to delete content index from Redis: {e}") logger.warning(f"Failed to delete content index from Redis: {e}")
self._save_content_index() self._save_content_index()
@@ -259,7 +259,7 @@ class L1CacheManager:
return self.cache_dir / "ipfs_index.json" return self.cache_dir / "ipfs_index.json"
def _load_ipfs_index(self): def _load_ipfs_index(self):
"""Load content_hash -> ipfs_cid index from Redis or JSON file.""" """Load cid -> ipfs_cid index from Redis or JSON file."""
# If Redis available and has data, use it # If Redis available and has data, use it
if self._redis: if self._redis:
try: try:
@@ -293,71 +293,71 @@ class L1CacheManager:
logger.warning(f"Failed to migrate IPFS index to Redis: {e}") logger.warning(f"Failed to migrate IPFS index to Redis: {e}")
def _save_ipfs_index(self): def _save_ipfs_index(self):
"""Save content_hash -> ipfs_cid index to JSON file (backup).""" """Save cid -> ipfs_cid index to JSON file (backup)."""
with open(self._ipfs_index_path(), "w") as f: with open(self._ipfs_index_path(), "w") as f:
json.dump(self._ipfs_cids, f, indent=2) json.dump(self._ipfs_cids, f, indent=2)
def _set_ipfs_index(self, content_hash: str, ipfs_cid: str): def _set_ipfs_index(self, cid: str, ipfs_cid: str):
"""Set a single IPFS index entry (Redis + in-memory).""" """Set a single IPFS index entry (Redis + in-memory)."""
self._ipfs_cids[content_hash] = ipfs_cid self._ipfs_cids[cid] = ipfs_cid
if self._redis: if self._redis:
try: try:
self._redis.hset(self._redis_ipfs_key, content_hash, ipfs_cid) self._redis.hset(self._redis_ipfs_key, cid, ipfs_cid)
except Exception as e: except Exception as e:
logger.warning(f"Failed to set IPFS index in Redis: {e}") logger.warning(f"Failed to set IPFS index in Redis: {e}")
self._save_ipfs_index() self._save_ipfs_index()
def _get_ipfs_cid_from_index(self, content_hash: str) -> Optional[str]: def _get_ipfs_cid_from_index(self, cid: str) -> Optional[str]:
"""Get IPFS CID from index (Redis-first, then in-memory).""" """Get IPFS CID from index (Redis-first, then in-memory)."""
if self._redis: if self._redis:
try: try:
val = self._redis.hget(self._redis_ipfs_key, content_hash) val = self._redis.hget(self._redis_ipfs_key, cid)
if val: if val:
return val.decode() if isinstance(val, bytes) else val return val.decode() if isinstance(val, bytes) else val
except Exception as e: except Exception as e:
logger.warning(f"Failed to get IPFS CID from Redis: {e}") logger.warning(f"Failed to get IPFS CID from Redis: {e}")
return self._ipfs_cids.get(content_hash) return self._ipfs_cids.get(cid)
def get_ipfs_cid(self, content_hash: str) -> Optional[str]: def get_ipfs_cid(self, cid: str) -> Optional[str]:
"""Get IPFS CID for a content hash.""" """Get IPFS CID for a content hash."""
return self._get_ipfs_cid_from_index(content_hash) return self._get_ipfs_cid_from_index(cid)
def _is_shared_by_node_id(self, content_hash: str) -> bool: def _is_shared_by_node_id(self, cid: str) -> bool:
"""Check if a content_hash is shared via L2.""" """Check if a cid is shared via L2."""
return self.l2_checker.is_shared(content_hash) return self.l2_checker.is_shared(cid)
def _load_meta(self, content_hash: str) -> dict: def _load_meta(self, cid: str) -> dict:
"""Load metadata for a cached file.""" """Load metadata for a cached file."""
meta_path = self.cache_dir / f"{content_hash}.meta.json" meta_path = self.cache_dir / f"{cid}.meta.json"
if meta_path.exists(): if meta_path.exists():
with open(meta_path) as f: with open(meta_path) as f:
return json.load(f) return json.load(f)
return {} return {}
def is_pinned(self, content_hash: str) -> tuple[bool, str]: def is_pinned(self, cid: str) -> tuple[bool, str]:
""" """
Check if a content_hash is pinned (non-deletable). Check if a cid is pinned (non-deletable).
Returns: Returns:
(is_pinned, reason) tuple (is_pinned, reason) tuple
""" """
meta = self._load_meta(content_hash) meta = self._load_meta(cid)
if meta.get("pinned"): if meta.get("pinned"):
return True, meta.get("pin_reason", "published") return True, meta.get("pin_reason", "published")
return False, "" return False, ""
def _save_meta(self, content_hash: str, **updates) -> dict: def _save_meta(self, cid: str, **updates) -> dict:
"""Save/update metadata for a cached file.""" """Save/update metadata for a cached file."""
meta = self._load_meta(content_hash) meta = self._load_meta(cid)
meta.update(updates) meta.update(updates)
meta_path = self.cache_dir / f"{content_hash}.meta.json" meta_path = self.cache_dir / f"{cid}.meta.json"
with open(meta_path, "w") as f: with open(meta_path, "w") as f:
json.dump(meta, f, indent=2) json.dump(meta, f, indent=2)
return meta return meta
def pin(self, content_hash: str, reason: str = "published") -> None: def pin(self, cid: str, reason: str = "published") -> None:
"""Mark an item as pinned (non-deletable).""" """Mark an item as pinned (non-deletable)."""
self._save_meta(content_hash, pinned=True, pin_reason=reason) self._save_meta(cid, pinned=True, pin_reason=reason)
# ============ File Storage ============ # ============ File Storage ============
@@ -375,31 +375,28 @@ class L1CacheManager:
Args: Args:
source_path: Path to file to cache source_path: Path to file to cache
node_type: Type of node (e.g., "upload", "source", "effect") node_type: Type of node (e.g., "upload", "source", "effect")
node_id: Optional node_id; if not provided, uses content_hash node_id: Optional node_id; if not provided, uses CID
execution_time: How long the operation took execution_time: How long the operation took
move: If True, move instead of copy move: If True, move instead of copy
Returns: Returns:
Tuple of (CachedFile with both node_id and content_hash, IPFS CID or None) Tuple of (CachedFile with both node_id and cid, CID)
""" """
# Compute content hash first # Upload to IPFS first to get the CID (primary identifier)
content_hash = file_hash(source_path) cid = ipfs_client.add_file(source_path)
if not cid:
# Fallback to local hash if IPFS unavailable
cid = file_hash(source_path)
logger.warning(f"IPFS unavailable, using local hash: {cid[:16]}...")
# Use content_hash as node_id if not provided # Use CID as node_id if not provided
# This is for legacy/uploaded files that don't have a DAG node
if node_id is None: if node_id is None:
node_id = content_hash node_id = cid
# Check if already cached (by node_id) # Check if already cached (by node_id)
existing = self.cache.get_entry(node_id) existing = self.cache.get_entry(node_id)
if existing and existing.output_path.exists(): if existing and existing.output_path.exists():
# Already cached - still try to get IPFS CID if we don't have it return CachedFile.from_cache_entry(existing), cid
ipfs_cid = self._get_ipfs_cid_from_index(content_hash)
if not ipfs_cid:
ipfs_cid = ipfs_client.add_file(existing.output_path)
if ipfs_cid:
self._set_ipfs_index(content_hash, ipfs_cid)
return CachedFile.from_cache_entry(existing), ipfs_cid
# Store in local cache # Store in local cache
self.cache.put( self.cache.put(
@@ -412,16 +409,12 @@ class L1CacheManager:
entry = self.cache.get_entry(node_id) entry = self.cache.get_entry(node_id)
# Update content index (Redis + local) # Update content index (CID -> node_id mapping)
self._set_content_index(entry.content_hash, node_id) self._set_content_index(cid, node_id)
# Upload to IPFS (async in background would be better, but sync for now) logger.info(f"Cached: {cid[:16]}...")
ipfs_cid = ipfs_client.add_file(entry.output_path)
if ipfs_cid:
self._set_ipfs_index(entry.content_hash, ipfs_cid)
logger.info(f"Uploaded to IPFS: {entry.content_hash[:16]}... -> {ipfs_cid}")
return CachedFile.from_cache_entry(entry), ipfs_cid return CachedFile.from_cache_entry(entry), cid
def get_by_node_id(self, node_id: str) -> Optional[Path]: def get_by_node_id(self, node_id: str) -> Optional[Path]:
"""Get cached file path by node_id.""" """Get cached file path by node_id."""
@@ -432,46 +425,46 @@ class L1CacheManager:
# CIDv0 starts with "Qm", CIDv1 starts with "bafy" or other multibase prefixes # CIDv0 starts with "Qm", CIDv1 starts with "bafy" or other multibase prefixes
return identifier.startswith("Qm") or identifier.startswith("bafy") or identifier.startswith("baf") return identifier.startswith("Qm") or identifier.startswith("bafy") or identifier.startswith("baf")
def get_by_content_hash(self, content_hash: str) -> Optional[Path]: def get_by_cid(self, cid: str) -> Optional[Path]:
"""Get cached file path by content_hash or IPFS CID. Falls back to IPFS if not in local cache.""" """Get cached file path by cid or IPFS CID. Falls back to IPFS if not in local cache."""
# If it looks like an IPFS CID, use get_by_cid instead # If it looks like an IPFS CID, use get_by_cid instead
if self._is_ipfs_cid(content_hash): if self._is_ipfs_cid(cid):
return self.get_by_cid(content_hash) return self.get_by_cid(cid)
# Check index first (Redis then local) # Check index first (Redis then local)
node_id = self._get_content_index(content_hash) node_id = self._get_content_index(cid)
if node_id: if node_id:
path = self.cache.get(node_id) path = self.cache.get(node_id)
if path and path.exists(): if path and path.exists():
logger.debug(f" Found via index: {path}") logger.debug(f" Found via index: {path}")
return path return path
# For uploads, node_id == content_hash, so try direct lookup # For uploads, node_id == cid, so try direct lookup
# This works even if cache index hasn't been reloaded # This works even if cache index hasn't been reloaded
path = self.cache.get(content_hash) path = self.cache.get(cid)
logger.debug(f" cache.get({content_hash[:16]}...) returned: {path}") logger.debug(f" cache.get({cid[:16]}...) returned: {path}")
if path and path.exists(): if path and path.exists():
self._set_content_index(content_hash, content_hash) self._set_content_index(cid, cid)
return path return path
# Scan cache entries (fallback for new structure) # Scan cache entries (fallback for new structure)
entry = self.cache.find_by_content_hash(content_hash) entry = self.cache.find_by_cid(cid)
if entry and entry.output_path.exists(): if entry and entry.output_path.exists():
logger.debug(f" Found via scan: {entry.output_path}") logger.debug(f" Found via scan: {entry.output_path}")
self._set_content_index(content_hash, entry.node_id) self._set_content_index(cid, entry.node_id)
return entry.output_path return entry.output_path
# Check legacy location (files stored directly as CACHE_DIR/{content_hash}) # Check legacy location (files stored directly as CACHE_DIR/{cid})
legacy_path = self.cache_dir / content_hash legacy_path = self.cache_dir / cid
if legacy_path.exists() and legacy_path.is_file(): if legacy_path.exists() and legacy_path.is_file():
return legacy_path return legacy_path
# Try to recover from IPFS if we have a CID # Try to recover from IPFS if we have a CID
ipfs_cid = self._get_ipfs_cid_from_index(content_hash) ipfs_cid = self._get_ipfs_cid_from_index(cid)
if ipfs_cid: if ipfs_cid:
logger.info(f"Recovering from IPFS: {content_hash[:16]}... ({ipfs_cid})") logger.info(f"Recovering from IPFS: {cid[:16]}... ({ipfs_cid})")
recovery_path = self.legacy_dir / content_hash recovery_path = self.legacy_dir / cid
if ipfs_client.get_file(ipfs_cid, recovery_path): if ipfs_client.get_file(ipfs_cid, recovery_path):
logger.info(f"Recovered from IPFS: {recovery_path}") logger.info(f"Recovered from IPFS: {recovery_path}")
return recovery_path return recovery_path
@@ -504,16 +497,16 @@ class L1CacheManager:
return None return None
def has_content(self, content_hash: str) -> bool: def has_content(self, cid: str) -> bool:
"""Check if content exists in cache.""" """Check if content exists in cache."""
return self.get_by_content_hash(content_hash) is not None return self.get_by_cid(cid) is not None
def get_entry_by_content_hash(self, content_hash: str) -> Optional[CacheEntry]: def get_entry_by_cid(self, cid: str) -> Optional[CacheEntry]:
"""Get cache entry by content_hash.""" """Get cache entry by cid."""
node_id = self._get_content_index(content_hash) node_id = self._get_content_index(cid)
if node_id: if node_id:
return self.cache.get_entry(node_id) return self.cache.get_entry(node_id)
return self.cache.find_by_content_hash(content_hash) return self.cache.find_by_cid(cid)
def list_all(self) -> List[CachedFile]: def list_all(self) -> List[CachedFile]:
"""List all cached files.""" """List all cached files."""
@@ -523,11 +516,11 @@ class L1CacheManager:
# New cache structure entries # New cache structure entries
for entry in self.cache.list_entries(): for entry in self.cache.list_entries():
files.append(CachedFile.from_cache_entry(entry)) files.append(CachedFile.from_cache_entry(entry))
if entry.content_hash: if entry.cid:
seen_hashes.add(entry.content_hash) seen_hashes.add(entry.cid)
# Legacy files stored directly in cache_dir (old structure) # Legacy files stored directly in cache_dir (old structure)
# These are files named by content_hash directly in CACHE_DIR # These are files named by cid directly in CACHE_DIR
for f in self.cache_dir.iterdir(): for f in self.cache_dir.iterdir():
# Skip directories and special files # Skip directories and special files
if not f.is_file(): if not f.is_file():
@@ -544,7 +537,7 @@ class L1CacheManager:
files.append(CachedFile( files.append(CachedFile(
node_id=f.name, node_id=f.name,
content_hash=f.name, cid=f.name,
path=f, path=f,
size_bytes=f.stat().st_size, size_bytes=f.stat().st_size,
node_type="legacy", node_type="legacy",
@@ -566,8 +559,8 @@ class L1CacheManager:
""" """
hashes = [] hashes = []
for entry in self.cache.list_entries(): for entry in self.cache.list_entries():
if entry.node_type == node_type and entry.content_hash: if entry.node_type == node_type and entry.cid:
hashes.append(entry.content_hash) hashes.append(entry.cid)
return hashes return hashes
# ============ Activity Tracking ============ # ============ Activity Tracking ============
@@ -590,19 +583,19 @@ class L1CacheManager:
def record_simple_activity( def record_simple_activity(
self, self,
input_hashes: List[str], input_hashes: List[str],
output_hash: str, output_cid: str,
run_id: str = None, run_id: str = None,
) -> Activity: ) -> Activity:
""" """
Record a simple (non-DAG) execution as an activity. Record a simple (non-DAG) execution as an activity.
For legacy single-effect runs that don't use full DAG execution. For legacy single-effect runs that don't use full DAG execution.
Uses content_hash as node_id. Uses cid as node_id.
""" """
activity = Activity( activity = Activity(
activity_id=run_id or str(hash((tuple(input_hashes), output_hash))), activity_id=run_id or str(hash((tuple(input_hashes), output_cid))),
input_ids=sorted(input_hashes), input_ids=sorted(input_hashes),
output_id=output_hash, output_id=output_cid,
intermediate_ids=[], intermediate_ids=[],
created_at=datetime.now(timezone.utc).timestamp(), created_at=datetime.now(timezone.utc).timestamp(),
status="completed", status="completed",
@@ -624,7 +617,7 @@ class L1CacheManager:
# ============ Deletion Rules ============ # ============ Deletion Rules ============
def can_delete(self, content_hash: str) -> tuple[bool, str]: def can_delete(self, cid: str) -> tuple[bool, str]:
""" """
Check if a cached item can be deleted. Check if a cached item can be deleted.
@@ -632,12 +625,12 @@ class L1CacheManager:
(can_delete, reason) tuple (can_delete, reason) tuple
""" """
# Check if pinned (published or input to published) # Check if pinned (published or input to published)
pinned, reason = self.is_pinned(content_hash) pinned, reason = self.is_pinned(cid)
if pinned: if pinned:
return False, f"Item is pinned ({reason})" return False, f"Item is pinned ({reason})"
# Find node_id for this content # Find node_id for this content
node_id = self._get_content_index(content_hash) or content_hash node_id = self._get_content_index(cid) or cid
# Check if it's an input or output of any activity # Check if it's an input or output of any activity
for activity in self.activity_store.list(): for activity in self.activity_store.list():
@@ -663,34 +656,34 @@ class L1CacheManager:
for node_id in activity.all_node_ids: for node_id in activity.all_node_ids:
entry = self.cache.get_entry(node_id) entry = self.cache.get_entry(node_id)
if entry: if entry:
pinned, reason = self.is_pinned(entry.content_hash) pinned, reason = self.is_pinned(entry.cid)
if pinned: if pinned:
return False, f"Item {node_id} is pinned ({reason})" return False, f"Item {node_id} is pinned ({reason})"
return True, "OK" return True, "OK"
def delete_by_content_hash(self, content_hash: str) -> tuple[bool, str]: def delete_by_cid(self, cid: str) -> tuple[bool, str]:
""" """
Delete a cached item by content_hash. Delete a cached item by cid.
Enforces deletion rules. Enforces deletion rules.
Returns: Returns:
(success, message) tuple (success, message) tuple
""" """
can_delete, reason = self.can_delete(content_hash) can_delete, reason = self.can_delete(cid)
if not can_delete: if not can_delete:
return False, reason return False, reason
# Find and delete # Find and delete
node_id = self._get_content_index(content_hash) node_id = self._get_content_index(cid)
if node_id: if node_id:
self.cache.remove(node_id) self.cache.remove(node_id)
self._del_content_index(content_hash) self._del_content_index(cid)
return True, "Deleted" return True, "Deleted"
# Try legacy # Try legacy
legacy_path = self.legacy_dir / content_hash legacy_path = self.legacy_dir / cid
if legacy_path.exists(): if legacy_path.exists():
legacy_path.unlink() legacy_path.unlink()
return True, "Deleted (legacy)" return True, "Deleted (legacy)"
@@ -732,7 +725,7 @@ class L1CacheManager:
if activity.output_id: if activity.output_id:
entry = self.cache.get_entry(activity.output_id) entry = self.cache.get_entry(activity.output_id)
if entry: if entry:
pinned, reason = self.is_pinned(entry.content_hash) pinned, reason = self.is_pinned(entry.cid)
if pinned: if pinned:
return False, f"Output is pinned ({reason})" return False, f"Output is pinned ({reason})"
@@ -743,9 +736,9 @@ class L1CacheManager:
# Remove from cache # Remove from cache
self.cache.remove(activity.output_id) self.cache.remove(activity.output_id)
# Remove from content index (Redis + local) # Remove from content index (Redis + local)
self._del_content_index(entry.content_hash) self._del_content_index(entry.cid)
# Delete from legacy dir if exists # Delete from legacy dir if exists
legacy_path = self.legacy_dir / entry.content_hash legacy_path = self.legacy_dir / entry.cid
if legacy_path.exists(): if legacy_path.exists():
legacy_path.unlink() legacy_path.unlink()
@@ -754,8 +747,8 @@ class L1CacheManager:
entry = self.cache.get_entry(node_id) entry = self.cache.get_entry(node_id)
if entry: if entry:
self.cache.remove(node_id) self.cache.remove(node_id)
self._del_content_index(entry.content_hash) self._del_content_index(entry.cid)
legacy_path = self.legacy_dir / entry.content_hash legacy_path = self.legacy_dir / entry.cid
if legacy_path.exists(): if legacy_path.exists():
legacy_path.unlink() legacy_path.unlink()
@@ -777,13 +770,13 @@ class L1CacheManager:
# ============ L2 Integration ============ # ============ L2 Integration ============
def mark_published(self, content_hash: str): def mark_published(self, cid: str):
"""Mark a content_hash as published to L2.""" """Mark a cid as published to L2."""
self.l2_checker.mark_shared(content_hash) self.l2_checker.mark_shared(cid)
def invalidate_shared_cache(self, content_hash: str): def invalidate_shared_cache(self, cid: str):
"""Invalidate shared status cache (call if item might be unpublished).""" """Invalidate shared status cache (call if item might be unpublished)."""
self.l2_checker.invalidate(content_hash) self.l2_checker.invalidate(cid)
# ============ Stats ============ # ============ Stats ============

View File

@@ -19,7 +19,7 @@ SCHEMA_SQL = """
-- Core cache: just content hash and IPFS CID -- Core cache: just content hash and IPFS CID
-- Physical file storage - shared by all users -- Physical file storage - shared by all users
CREATE TABLE IF NOT EXISTS cache_items ( CREATE TABLE IF NOT EXISTS cache_items (
content_hash VARCHAR(64) PRIMARY KEY, cid VARCHAR(64) PRIMARY KEY,
ipfs_cid VARCHAR(128), ipfs_cid VARCHAR(128),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
); );
@@ -28,7 +28,7 @@ CREATE TABLE IF NOT EXISTS cache_items (
-- actor_id format: @username@server (ActivityPub style) -- actor_id format: @username@server (ActivityPub style)
CREATE TABLE IF NOT EXISTS item_types ( CREATE TABLE IF NOT EXISTS item_types (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
content_hash VARCHAR(64) REFERENCES cache_items(content_hash) ON DELETE CASCADE, cid VARCHAR(64) REFERENCES cache_items(cid) ON DELETE CASCADE,
actor_id VARCHAR(255) NOT NULL, actor_id VARCHAR(255) NOT NULL,
type VARCHAR(50) NOT NULL, type VARCHAR(50) NOT NULL,
path VARCHAR(255), path VARCHAR(255),
@@ -40,7 +40,7 @@ CREATE TABLE IF NOT EXISTS item_types (
filename VARCHAR(255), filename VARCHAR(255),
metadata JSONB DEFAULT '{}', metadata JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(content_hash, actor_id, type, path) UNIQUE(cid, actor_id, type, path)
); );
-- Add columns if they don't exist (for existing databases) -- Add columns if they don't exist (for existing databases)
@@ -61,7 +61,7 @@ CREATE TABLE IF NOT EXISTS pin_reasons (
-- L2 shares: per-user shares (includes content_type for role when shared) -- L2 shares: per-user shares (includes content_type for role when shared)
CREATE TABLE IF NOT EXISTS l2_shares ( CREATE TABLE IF NOT EXISTS l2_shares (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
content_hash VARCHAR(64) REFERENCES cache_items(content_hash) ON DELETE CASCADE, cid VARCHAR(64) REFERENCES cache_items(cid) ON DELETE CASCADE,
actor_id VARCHAR(255) NOT NULL, actor_id VARCHAR(255) NOT NULL,
l2_server VARCHAR(255) NOT NULL, l2_server VARCHAR(255) NOT NULL,
asset_name VARCHAR(255) NOT NULL, asset_name VARCHAR(255) NOT NULL,
@@ -69,7 +69,7 @@ CREATE TABLE IF NOT EXISTS l2_shares (
content_type VARCHAR(50) NOT NULL, content_type VARCHAR(50) NOT NULL,
published_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), published_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
last_synced_at TIMESTAMP WITH TIME ZONE, last_synced_at TIMESTAMP WITH TIME ZONE,
UNIQUE(content_hash, actor_id, l2_server, content_type) UNIQUE(cid, actor_id, l2_server, content_type)
); );
-- Add activity_id column if it doesn't exist (for existing databases) -- Add activity_id column if it doesn't exist (for existing databases)
@@ -82,7 +82,7 @@ END $$;
-- run_id is a hash of (sorted inputs + recipe), making runs deterministic -- run_id is a hash of (sorted inputs + recipe), making runs deterministic
CREATE TABLE IF NOT EXISTS run_cache ( CREATE TABLE IF NOT EXISTS run_cache (
run_id VARCHAR(64) PRIMARY KEY, run_id VARCHAR(64) PRIMARY KEY,
output_hash VARCHAR(64) NOT NULL, output_cid VARCHAR(64) NOT NULL,
ipfs_cid VARCHAR(128), ipfs_cid VARCHAR(128),
provenance_cid VARCHAR(128), provenance_cid VARCHAR(128),
recipe VARCHAR(255) NOT NULL, recipe VARCHAR(255) NOT NULL,
@@ -128,27 +128,27 @@ CREATE TABLE IF NOT EXISTS storage_backends (
-- Storage pins tracking (what's pinned where) -- Storage pins tracking (what's pinned where)
CREATE TABLE IF NOT EXISTS storage_pins ( CREATE TABLE IF NOT EXISTS storage_pins (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
content_hash VARCHAR(64) NOT NULL, cid VARCHAR(64) NOT NULL,
storage_id INTEGER NOT NULL REFERENCES storage_backends(id) ON DELETE CASCADE, storage_id INTEGER NOT NULL REFERENCES storage_backends(id) ON DELETE CASCADE,
ipfs_cid VARCHAR(128), ipfs_cid VARCHAR(128),
pin_type VARCHAR(20) NOT NULL, -- 'user_content', 'donated', 'system' pin_type VARCHAR(20) NOT NULL, -- 'user_content', 'donated', 'system'
size_bytes BIGINT, size_bytes BIGINT,
pinned_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), pinned_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(content_hash, storage_id) UNIQUE(cid, storage_id)
); );
-- Indexes -- Indexes
CREATE INDEX IF NOT EXISTS idx_item_types_content_hash ON item_types(content_hash); CREATE INDEX IF NOT EXISTS idx_item_types_cid ON item_types(cid);
CREATE INDEX IF NOT EXISTS idx_item_types_actor_id ON item_types(actor_id); CREATE INDEX IF NOT EXISTS idx_item_types_actor_id ON item_types(actor_id);
CREATE INDEX IF NOT EXISTS idx_item_types_type ON item_types(type); CREATE INDEX IF NOT EXISTS idx_item_types_type ON item_types(type);
CREATE INDEX IF NOT EXISTS idx_item_types_path ON item_types(path); CREATE INDEX IF NOT EXISTS idx_item_types_path ON item_types(path);
CREATE INDEX IF NOT EXISTS idx_pin_reasons_item_type ON pin_reasons(item_type_id); CREATE INDEX IF NOT EXISTS idx_pin_reasons_item_type ON pin_reasons(item_type_id);
CREATE INDEX IF NOT EXISTS idx_l2_shares_content_hash ON l2_shares(content_hash); CREATE INDEX IF NOT EXISTS idx_l2_shares_cid ON l2_shares(cid);
CREATE INDEX IF NOT EXISTS idx_l2_shares_actor_id ON l2_shares(actor_id); CREATE INDEX IF NOT EXISTS idx_l2_shares_actor_id ON l2_shares(actor_id);
CREATE INDEX IF NOT EXISTS idx_run_cache_output ON run_cache(output_hash); CREATE INDEX IF NOT EXISTS idx_run_cache_output ON run_cache(output_cid);
CREATE INDEX IF NOT EXISTS idx_storage_backends_actor ON storage_backends(actor_id); CREATE INDEX IF NOT EXISTS idx_storage_backends_actor ON storage_backends(actor_id);
CREATE INDEX IF NOT EXISTS idx_storage_backends_type ON storage_backends(provider_type); CREATE INDEX IF NOT EXISTS idx_storage_backends_type ON storage_backends(provider_type);
CREATE INDEX IF NOT EXISTS idx_storage_pins_hash ON storage_pins(content_hash); CREATE INDEX IF NOT EXISTS idx_storage_pins_hash ON storage_pins(cid);
CREATE INDEX IF NOT EXISTS idx_storage_pins_storage ON storage_pins(storage_id); CREATE INDEX IF NOT EXISTS idx_storage_pins_storage ON storage_pins(storage_id);
""" """
@@ -171,47 +171,47 @@ async def close_db():
# ============ Cache Items ============ # ============ Cache Items ============
async def create_cache_item(content_hash: str, ipfs_cid: Optional[str] = None) -> dict: async def create_cache_item(cid: str, ipfs_cid: Optional[str] = None) -> dict:
"""Create a cache item. Returns the created item.""" """Create a cache item. Returns the created item."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
row = await conn.fetchrow( row = await conn.fetchrow(
""" """
INSERT INTO cache_items (content_hash, ipfs_cid) INSERT INTO cache_items (cid, ipfs_cid)
VALUES ($1, $2) VALUES ($1, $2)
ON CONFLICT (content_hash) DO UPDATE SET ipfs_cid = COALESCE($2, cache_items.ipfs_cid) ON CONFLICT (cid) DO UPDATE SET ipfs_cid = COALESCE($2, cache_items.ipfs_cid)
RETURNING content_hash, ipfs_cid, created_at RETURNING cid, ipfs_cid, created_at
""", """,
content_hash, ipfs_cid cid, ipfs_cid
) )
return dict(row) return dict(row)
async def get_cache_item(content_hash: str) -> Optional[dict]: async def get_cache_item(cid: str) -> Optional[dict]:
"""Get a cache item by content hash.""" """Get a cache item by content hash."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
row = await conn.fetchrow( row = await conn.fetchrow(
"SELECT content_hash, ipfs_cid, created_at FROM cache_items WHERE content_hash = $1", "SELECT cid, ipfs_cid, created_at FROM cache_items WHERE cid = $1",
content_hash cid
) )
return dict(row) if row else None return dict(row) if row else None
async def update_cache_item_ipfs_cid(content_hash: str, ipfs_cid: str) -> bool: async def update_cache_item_ipfs_cid(cid: str, ipfs_cid: str) -> bool:
"""Update the IPFS CID for a cache item.""" """Update the IPFS CID for a cache item."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
result = await conn.execute( result = await conn.execute(
"UPDATE cache_items SET ipfs_cid = $2 WHERE content_hash = $1", "UPDATE cache_items SET ipfs_cid = $2 WHERE cid = $1",
content_hash, ipfs_cid cid, ipfs_cid
) )
return result == "UPDATE 1" return result == "UPDATE 1"
async def delete_cache_item(content_hash: str) -> bool: async def delete_cache_item(cid: str) -> bool:
"""Delete a cache item and all associated data (cascades).""" """Delete a cache item and all associated data (cascades)."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
result = await conn.execute( result = await conn.execute(
"DELETE FROM cache_items WHERE content_hash = $1", "DELETE FROM cache_items WHERE cid = $1",
content_hash cid
) )
return result == "DELETE 1" return result == "DELETE 1"
@@ -221,7 +221,7 @@ async def list_cache_items(limit: int = 100, offset: int = 0) -> List[dict]:
async with pool.acquire() as conn: async with pool.acquire() as conn:
rows = await conn.fetch( rows = await conn.fetch(
""" """
SELECT content_hash, ipfs_cid, created_at SELECT cid, ipfs_cid, created_at
FROM cache_items FROM cache_items
ORDER BY created_at DESC ORDER BY created_at DESC
LIMIT $1 OFFSET $2 LIMIT $1 OFFSET $2
@@ -234,7 +234,7 @@ async def list_cache_items(limit: int = 100, offset: int = 0) -> List[dict]:
# ============ Item Types ============ # ============ Item Types ============
async def add_item_type( async def add_item_type(
content_hash: str, cid: str,
actor_id: str, actor_id: str,
item_type: str, item_type: str,
path: Optional[str] = None, path: Optional[str] = None,
@@ -247,72 +247,72 @@ async def add_item_type(
async with pool.acquire() as conn: async with pool.acquire() as conn:
# Ensure cache_item exists # Ensure cache_item exists
await conn.execute( await conn.execute(
"INSERT INTO cache_items (content_hash) VALUES ($1) ON CONFLICT DO NOTHING", "INSERT INTO cache_items (cid) VALUES ($1) ON CONFLICT DO NOTHING",
content_hash cid
) )
# Insert or update item_type # Insert or update item_type
row = await conn.fetchrow( row = await conn.fetchrow(
""" """
INSERT INTO item_types (content_hash, actor_id, type, path, description, source_type, source_url, source_note) INSERT INTO item_types (cid, actor_id, type, path, description, source_type, source_url, source_note)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (content_hash, actor_id, type, path) DO UPDATE SET ON CONFLICT (cid, actor_id, type, path) DO UPDATE SET
description = COALESCE($5, item_types.description), description = COALESCE($5, item_types.description),
source_type = COALESCE($6, item_types.source_type), source_type = COALESCE($6, item_types.source_type),
source_url = COALESCE($7, item_types.source_url), source_url = COALESCE($7, item_types.source_url),
source_note = COALESCE($8, item_types.source_note) source_note = COALESCE($8, item_types.source_note)
RETURNING id, content_hash, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at RETURNING id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
""", """,
content_hash, actor_id, item_type, path, description, source_type, source_url, source_note cid, actor_id, item_type, path, description, source_type, source_url, source_note
) )
return dict(row) return dict(row)
async def get_item_types(content_hash: str, actor_id: Optional[str] = None) -> List[dict]: async def get_item_types(cid: str, actor_id: Optional[str] = None) -> List[dict]:
"""Get types for a cache item, optionally filtered by user.""" """Get types for a cache item, optionally filtered by user."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
if actor_id: if actor_id:
rows = await conn.fetch( rows = await conn.fetch(
""" """
SELECT id, content_hash, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
FROM item_types FROM item_types
WHERE content_hash = $1 AND actor_id = $2 WHERE cid = $1 AND actor_id = $2
ORDER BY created_at ORDER BY created_at
""", """,
content_hash, actor_id cid, actor_id
) )
else: else:
rows = await conn.fetch( rows = await conn.fetch(
""" """
SELECT id, content_hash, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
FROM item_types FROM item_types
WHERE content_hash = $1 WHERE cid = $1
ORDER BY created_at ORDER BY created_at
""", """,
content_hash cid
) )
return [dict(row) for row in rows] return [dict(row) for row in rows]
async def get_item_type(content_hash: str, actor_id: str, item_type: str, path: Optional[str] = None) -> Optional[dict]: async def get_item_type(cid: str, actor_id: str, item_type: str, path: Optional[str] = None) -> Optional[dict]:
"""Get a specific type for a cache item and user.""" """Get a specific type for a cache item and user."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
if path is None: if path is None:
row = await conn.fetchrow( row = await conn.fetchrow(
""" """
SELECT id, content_hash, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
FROM item_types FROM item_types
WHERE content_hash = $1 AND actor_id = $2 AND type = $3 AND path IS NULL WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
""", """,
content_hash, actor_id, item_type cid, actor_id, item_type
) )
else: else:
row = await conn.fetchrow( row = await conn.fetchrow(
""" """
SELECT id, content_hash, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at SELECT id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, created_at
FROM item_types FROM item_types
WHERE content_hash = $1 AND actor_id = $2 AND type = $3 AND path = $4 WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path = $4
""", """,
content_hash, actor_id, item_type, path cid, actor_id, item_type, path
) )
return dict(row) if row else None return dict(row) if row else None
@@ -340,18 +340,18 @@ async def update_item_type(
return result == "UPDATE 1" return result == "UPDATE 1"
async def delete_item_type(content_hash: str, actor_id: str, item_type: str, path: Optional[str] = None) -> bool: async def delete_item_type(cid: str, actor_id: str, item_type: str, path: Optional[str] = None) -> bool:
"""Delete a specific type from a cache item for a user.""" """Delete a specific type from a cache item for a user."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
if path is None: if path is None:
result = await conn.execute( result = await conn.execute(
"DELETE FROM item_types WHERE content_hash = $1 AND actor_id = $2 AND type = $3 AND path IS NULL", "DELETE FROM item_types WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL",
content_hash, actor_id, item_type cid, actor_id, item_type
) )
else: else:
result = await conn.execute( result = await conn.execute(
"DELETE FROM item_types WHERE content_hash = $1 AND actor_id = $2 AND type = $3 AND path = $4", "DELETE FROM item_types WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path = $4",
content_hash, actor_id, item_type, path cid, actor_id, item_type, path
) )
return result == "DELETE 1" return result == "DELETE 1"
@@ -362,11 +362,11 @@ async def list_items_by_type(item_type: str, actor_id: Optional[str] = None, lim
if actor_id: if actor_id:
rows = await conn.fetch( rows = await conn.fetch(
""" """
SELECT it.id, it.content_hash, it.actor_id, it.type, it.path, it.description, SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
it.source_type, it.source_url, it.source_note, it.pinned, it.created_at, it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
ci.ipfs_cid ci.ipfs_cid
FROM item_types it FROM item_types it
JOIN cache_items ci ON it.content_hash = ci.content_hash JOIN cache_items ci ON it.cid = ci.cid
WHERE it.type = $1 AND it.actor_id = $2 WHERE it.type = $1 AND it.actor_id = $2
ORDER BY it.created_at DESC ORDER BY it.created_at DESC
LIMIT $3 OFFSET $4 LIMIT $3 OFFSET $4
@@ -376,11 +376,11 @@ async def list_items_by_type(item_type: str, actor_id: Optional[str] = None, lim
else: else:
rows = await conn.fetch( rows = await conn.fetch(
""" """
SELECT it.id, it.content_hash, it.actor_id, it.type, it.path, it.description, SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
it.source_type, it.source_url, it.source_note, it.pinned, it.created_at, it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
ci.ipfs_cid ci.ipfs_cid
FROM item_types it FROM item_types it
JOIN cache_items ci ON it.content_hash = ci.content_hash JOIN cache_items ci ON it.cid = ci.cid
WHERE it.type = $1 WHERE it.type = $1
ORDER BY it.created_at DESC ORDER BY it.created_at DESC
LIMIT $2 OFFSET $3 LIMIT $2 OFFSET $3
@@ -396,11 +396,11 @@ async def get_item_by_path(item_type: str, path: str, actor_id: Optional[str] =
if actor_id: if actor_id:
row = await conn.fetchrow( row = await conn.fetchrow(
""" """
SELECT it.id, it.content_hash, it.actor_id, it.type, it.path, it.description, SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
it.source_type, it.source_url, it.source_note, it.pinned, it.created_at, it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
ci.ipfs_cid ci.ipfs_cid
FROM item_types it FROM item_types it
JOIN cache_items ci ON it.content_hash = ci.content_hash JOIN cache_items ci ON it.cid = ci.cid
WHERE it.type = $1 AND it.path = $2 AND it.actor_id = $3 WHERE it.type = $1 AND it.path = $2 AND it.actor_id = $3
""", """,
item_type, path, actor_id item_type, path, actor_id
@@ -408,11 +408,11 @@ async def get_item_by_path(item_type: str, path: str, actor_id: Optional[str] =
else: else:
row = await conn.fetchrow( row = await conn.fetchrow(
""" """
SELECT it.id, it.content_hash, it.actor_id, it.type, it.path, it.description, SELECT it.id, it.cid, it.actor_id, it.type, it.path, it.description,
it.source_type, it.source_url, it.source_note, it.pinned, it.created_at, it.source_type, it.source_url, it.source_note, it.pinned, it.created_at,
ci.ipfs_cid ci.ipfs_cid
FROM item_types it FROM item_types it
JOIN cache_items ci ON it.content_hash = ci.content_hash JOIN cache_items ci ON it.cid = ci.cid
WHERE it.type = $1 AND it.path = $2 WHERE it.type = $1 AND it.path = $2
""", """,
item_type, path item_type, path
@@ -480,7 +480,7 @@ async def get_pin_reasons(item_type_id: int) -> List[dict]:
return [dict(row) for row in rows] return [dict(row) for row in rows]
async def is_item_pinned(content_hash: str, item_type: Optional[str] = None) -> tuple[bool, List[str]]: async def is_item_pinned(cid: str, item_type: Optional[str] = None) -> tuple[bool, List[str]]:
"""Check if any type of a cache item is pinned. Returns (is_pinned, reasons).""" """Check if any type of a cache item is pinned. Returns (is_pinned, reasons)."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
if item_type: if item_type:
@@ -489,9 +489,9 @@ async def is_item_pinned(content_hash: str, item_type: Optional[str] = None) ->
SELECT pr.reason SELECT pr.reason
FROM pin_reasons pr FROM pin_reasons pr
JOIN item_types it ON pr.item_type_id = it.id JOIN item_types it ON pr.item_type_id = it.id
WHERE it.content_hash = $1 AND it.type = $2 AND it.pinned = TRUE WHERE it.cid = $1 AND it.type = $2 AND it.pinned = TRUE
""", """,
content_hash, item_type cid, item_type
) )
else: else:
rows = await conn.fetch( rows = await conn.fetch(
@@ -499,9 +499,9 @@ async def is_item_pinned(content_hash: str, item_type: Optional[str] = None) ->
SELECT pr.reason SELECT pr.reason
FROM pin_reasons pr FROM pin_reasons pr
JOIN item_types it ON pr.item_type_id = it.id JOIN item_types it ON pr.item_type_id = it.id
WHERE it.content_hash = $1 AND it.pinned = TRUE WHERE it.cid = $1 AND it.pinned = TRUE
""", """,
content_hash cid
) )
reasons = [row["reason"] for row in rows] reasons = [row["reason"] for row in rows]
return len(reasons) > 0, reasons return len(reasons) > 0, reasons
@@ -510,7 +510,7 @@ async def is_item_pinned(content_hash: str, item_type: Optional[str] = None) ->
# ============ L2 Shares ============ # ============ L2 Shares ============
async def add_l2_share( async def add_l2_share(
content_hash: str, cid: str,
actor_id: str, actor_id: str,
l2_server: str, l2_server: str,
asset_name: str, asset_name: str,
@@ -520,85 +520,85 @@ async def add_l2_share(
async with pool.acquire() as conn: async with pool.acquire() as conn:
row = await conn.fetchrow( row = await conn.fetchrow(
""" """
INSERT INTO l2_shares (content_hash, actor_id, l2_server, asset_name, content_type, last_synced_at) INSERT INTO l2_shares (cid, actor_id, l2_server, asset_name, content_type, last_synced_at)
VALUES ($1, $2, $3, $4, $5, NOW()) VALUES ($1, $2, $3, $4, $5, NOW())
ON CONFLICT (content_hash, actor_id, l2_server, content_type) DO UPDATE SET ON CONFLICT (cid, actor_id, l2_server, content_type) DO UPDATE SET
asset_name = $4, asset_name = $4,
last_synced_at = NOW() last_synced_at = NOW()
RETURNING id, content_hash, actor_id, l2_server, asset_name, content_type, published_at, last_synced_at RETURNING id, cid, actor_id, l2_server, asset_name, content_type, published_at, last_synced_at
""", """,
content_hash, actor_id, l2_server, asset_name, content_type cid, actor_id, l2_server, asset_name, content_type
) )
return dict(row) return dict(row)
async def get_l2_shares(content_hash: str, actor_id: Optional[str] = None) -> List[dict]: async def get_l2_shares(cid: str, actor_id: Optional[str] = None) -> List[dict]:
"""Get L2 shares for a cache item, optionally filtered by user.""" """Get L2 shares for a cache item, optionally filtered by user."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
if actor_id: if actor_id:
rows = await conn.fetch( rows = await conn.fetch(
""" """
SELECT id, content_hash, actor_id, l2_server, asset_name, activity_id, content_type, published_at, last_synced_at SELECT id, cid, actor_id, l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
FROM l2_shares FROM l2_shares
WHERE content_hash = $1 AND actor_id = $2 WHERE cid = $1 AND actor_id = $2
ORDER BY published_at ORDER BY published_at
""", """,
content_hash, actor_id cid, actor_id
) )
else: else:
rows = await conn.fetch( rows = await conn.fetch(
""" """
SELECT id, content_hash, actor_id, l2_server, asset_name, activity_id, content_type, published_at, last_synced_at SELECT id, cid, actor_id, l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
FROM l2_shares FROM l2_shares
WHERE content_hash = $1 WHERE cid = $1
ORDER BY published_at ORDER BY published_at
""", """,
content_hash cid
) )
return [dict(row) for row in rows] return [dict(row) for row in rows]
async def delete_l2_share(content_hash: str, actor_id: str, l2_server: str, content_type: str) -> bool: async def delete_l2_share(cid: str, actor_id: str, l2_server: str, content_type: str) -> bool:
"""Delete an L2 share for a user.""" """Delete an L2 share for a user."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
result = await conn.execute( result = await conn.execute(
"DELETE FROM l2_shares WHERE content_hash = $1 AND actor_id = $2 AND l2_server = $3 AND content_type = $4", "DELETE FROM l2_shares WHERE cid = $1 AND actor_id = $2 AND l2_server = $3 AND content_type = $4",
content_hash, actor_id, l2_server, content_type cid, actor_id, l2_server, content_type
) )
return result == "DELETE 1" return result == "DELETE 1"
# ============ Cache Item Cleanup ============ # ============ Cache Item Cleanup ============
async def has_remaining_references(content_hash: str) -> bool: async def has_remaining_references(cid: str) -> bool:
"""Check if a cache item has any remaining item_types or l2_shares.""" """Check if a cache item has any remaining item_types or l2_shares."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
item_types_count = await conn.fetchval( item_types_count = await conn.fetchval(
"SELECT COUNT(*) FROM item_types WHERE content_hash = $1", "SELECT COUNT(*) FROM item_types WHERE cid = $1",
content_hash cid
) )
if item_types_count > 0: if item_types_count > 0:
return True return True
l2_shares_count = await conn.fetchval( l2_shares_count = await conn.fetchval(
"SELECT COUNT(*) FROM l2_shares WHERE content_hash = $1", "SELECT COUNT(*) FROM l2_shares WHERE cid = $1",
content_hash cid
) )
return l2_shares_count > 0 return l2_shares_count > 0
async def cleanup_orphaned_cache_item(content_hash: str) -> bool: async def cleanup_orphaned_cache_item(cid: str) -> bool:
"""Delete a cache item if it has no remaining references. Returns True if deleted.""" """Delete a cache item if it has no remaining references. Returns True if deleted."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
# Only delete if no item_types or l2_shares reference it # Only delete if no item_types or l2_shares reference it
result = await conn.execute( result = await conn.execute(
""" """
DELETE FROM cache_items DELETE FROM cache_items
WHERE content_hash = $1 WHERE cid = $1
AND NOT EXISTS (SELECT 1 FROM item_types WHERE content_hash = $1) AND NOT EXISTS (SELECT 1 FROM item_types WHERE cid = $1)
AND NOT EXISTS (SELECT 1 FROM l2_shares WHERE content_hash = $1) AND NOT EXISTS (SELECT 1 FROM l2_shares WHERE cid = $1)
""", """,
content_hash cid
) )
return result == "DELETE 1" return result == "DELETE 1"
@@ -610,7 +610,7 @@ import json as _json
async def save_item_metadata( async def save_item_metadata(
content_hash: str, cid: str,
actor_id: str, actor_id: str,
item_type: str = "media", item_type: str = "media",
filename: Optional[str] = None, filename: Optional[str] = None,
@@ -643,16 +643,16 @@ async def save_item_metadata(
async with pool.acquire() as conn: async with pool.acquire() as conn:
# Ensure cache_item exists # Ensure cache_item exists
await conn.execute( await conn.execute(
"INSERT INTO cache_items (content_hash) VALUES ($1) ON CONFLICT DO NOTHING", "INSERT INTO cache_items (cid) VALUES ($1) ON CONFLICT DO NOTHING",
content_hash cid
) )
# Upsert item_type # Upsert item_type
row = await conn.fetchrow( row = await conn.fetchrow(
""" """
INSERT INTO item_types (content_hash, actor_id, type, description, source_type, source_url, source_note, pinned, filename, metadata) INSERT INTO item_types (cid, actor_id, type, description, source_type, source_url, source_note, pinned, filename, metadata)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
ON CONFLICT (content_hash, actor_id, type, path) DO UPDATE SET ON CONFLICT (cid, actor_id, type, path) DO UPDATE SET
description = COALESCE(EXCLUDED.description, item_types.description), description = COALESCE(EXCLUDED.description, item_types.description),
source_type = COALESCE(EXCLUDED.source_type, item_types.source_type), source_type = COALESCE(EXCLUDED.source_type, item_types.source_type),
source_url = COALESCE(EXCLUDED.source_url, item_types.source_url), source_url = COALESCE(EXCLUDED.source_url, item_types.source_url),
@@ -660,9 +660,9 @@ async def save_item_metadata(
pinned = EXCLUDED.pinned, pinned = EXCLUDED.pinned,
filename = COALESCE(EXCLUDED.filename, item_types.filename), filename = COALESCE(EXCLUDED.filename, item_types.filename),
metadata = item_types.metadata || EXCLUDED.metadata metadata = item_types.metadata || EXCLUDED.metadata
RETURNING id, content_hash, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at RETURNING id, cid, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at
""", """,
content_hash, actor_id, item_type, description, source_type, source_url, source_note, pinned, filename, _json.dumps(metadata) cid, actor_id, item_type, description, source_type, source_url, source_note, pinned, filename, _json.dumps(metadata)
) )
item_type_id = row["id"] item_type_id = row["id"]
@@ -719,7 +719,7 @@ async def save_item_metadata(
return result return result
async def load_item_metadata(content_hash: str, actor_id: Optional[str] = None) -> dict: async def load_item_metadata(cid: str, actor_id: Optional[str] = None) -> dict:
""" """
Load item metadata from the database. Load item metadata from the database.
@@ -731,8 +731,8 @@ async def load_item_metadata(content_hash: str, actor_id: Optional[str] = None)
async with pool.acquire() as conn: async with pool.acquire() as conn:
# Get cache item # Get cache item
cache_item = await conn.fetchrow( cache_item = await conn.fetchrow(
"SELECT content_hash, ipfs_cid, created_at FROM cache_items WHERE content_hash = $1", "SELECT cid, ipfs_cid, created_at FROM cache_items WHERE cid = $1",
content_hash cid
) )
if not cache_item: if not cache_item:
@@ -743,19 +743,19 @@ async def load_item_metadata(content_hash: str, actor_id: Optional[str] = None)
item_types = await conn.fetch( item_types = await conn.fetch(
""" """
SELECT id, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at SELECT id, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at
FROM item_types WHERE content_hash = $1 AND actor_id = $2 FROM item_types WHERE cid = $1 AND actor_id = $2
ORDER BY created_at ORDER BY created_at
""", """,
content_hash, actor_id cid, actor_id
) )
else: else:
item_types = await conn.fetch( item_types = await conn.fetch(
""" """
SELECT id, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at SELECT id, actor_id, type, path, description, source_type, source_url, source_note, pinned, filename, metadata, created_at
FROM item_types WHERE content_hash = $1 FROM item_types WHERE cid = $1
ORDER BY created_at ORDER BY created_at
""", """,
content_hash cid
) )
if not item_types: if not item_types:
@@ -807,17 +807,17 @@ async def load_item_metadata(content_hash: str, actor_id: Optional[str] = None)
shares = await conn.fetch( shares = await conn.fetch(
""" """
SELECT l2_server, asset_name, activity_id, content_type, published_at, last_synced_at SELECT l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
FROM l2_shares WHERE content_hash = $1 AND actor_id = $2 FROM l2_shares WHERE cid = $1 AND actor_id = $2
""", """,
content_hash, actor_id cid, actor_id
) )
else: else:
shares = await conn.fetch( shares = await conn.fetch(
""" """
SELECT l2_server, asset_name, activity_id, content_type, published_at, last_synced_at SELECT l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
FROM l2_shares WHERE content_hash = $1 FROM l2_shares WHERE cid = $1
""", """,
content_hash cid
) )
if shares: if shares:
@@ -845,7 +845,7 @@ async def load_item_metadata(content_hash: str, actor_id: Optional[str] = None)
async def update_item_metadata( async def update_item_metadata(
content_hash: str, cid: str,
actor_id: str, actor_id: str,
item_type: str = "media", item_type: str = "media",
**updates **updates
@@ -880,15 +880,15 @@ async def update_item_metadata(
existing = await conn.fetchrow( existing = await conn.fetchrow(
""" """
SELECT id, metadata FROM item_types SELECT id, metadata FROM item_types
WHERE content_hash = $1 AND actor_id = $2 AND type = $3 AND path IS NULL WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
""", """,
content_hash, actor_id, item_type cid, actor_id, item_type
) )
if not existing: if not existing:
# Create new entry # Create new entry
return await save_item_metadata( return await save_item_metadata(
content_hash, actor_id, item_type, cid, actor_id, item_type,
filename=filename, description=description, filename=filename, description=description,
source_type=source_type, source_url=source_url, source_note=source_note, source_type=source_type, source_url=source_url, source_note=source_note,
pinned=pinned or False, pin_reason=pin_reason, pinned=pinned or False, pin_reason=pin_reason,
@@ -898,7 +898,7 @@ async def update_item_metadata(
# Build update query dynamically # Build update query dynamically
set_parts = [] set_parts = []
params = [content_hash, actor_id, item_type] params = [cid, actor_id, item_type]
param_idx = 4 param_idx = 4
if description is not None: if description is not None:
@@ -949,7 +949,7 @@ async def update_item_metadata(
if set_parts: if set_parts:
query = f""" query = f"""
UPDATE item_types SET {', '.join(set_parts)} UPDATE item_types SET {', '.join(set_parts)}
WHERE content_hash = $1 AND actor_id = $2 AND type = $3 AND path IS NULL WHERE cid = $1 AND actor_id = $2 AND type = $3 AND path IS NULL
""" """
await conn.execute(query, *params) await conn.execute(query, *params)
@@ -964,11 +964,11 @@ async def update_item_metadata(
existing["id"], pin_reason existing["id"], pin_reason
) )
return await load_item_metadata(content_hash, actor_id) return await load_item_metadata(cid, actor_id)
async def save_l2_share( async def save_l2_share(
content_hash: str, cid: str,
actor_id: str, actor_id: str,
l2_server: str, l2_server: str,
asset_name: str, asset_name: str,
@@ -979,15 +979,15 @@ async def save_l2_share(
async with pool.acquire() as conn: async with pool.acquire() as conn:
row = await conn.fetchrow( row = await conn.fetchrow(
""" """
INSERT INTO l2_shares (content_hash, actor_id, l2_server, asset_name, activity_id, content_type, last_synced_at) INSERT INTO l2_shares (cid, actor_id, l2_server, asset_name, activity_id, content_type, last_synced_at)
VALUES ($1, $2, $3, $4, $5, $6, NOW()) VALUES ($1, $2, $3, $4, $5, $6, NOW())
ON CONFLICT (content_hash, actor_id, l2_server, content_type) DO UPDATE SET ON CONFLICT (cid, actor_id, l2_server, content_type) DO UPDATE SET
asset_name = EXCLUDED.asset_name, asset_name = EXCLUDED.asset_name,
activity_id = COALESCE(EXCLUDED.activity_id, l2_shares.activity_id), activity_id = COALESCE(EXCLUDED.activity_id, l2_shares.activity_id),
last_synced_at = NOW() last_synced_at = NOW()
RETURNING l2_server, asset_name, activity_id, content_type, published_at, last_synced_at RETURNING l2_server, asset_name, activity_id, content_type, published_at, last_synced_at
""", """,
content_hash, actor_id, l2_server, asset_name, activity_id, content_type cid, actor_id, l2_server, asset_name, activity_id, content_type
) )
return { return {
"l2_server": row["l2_server"], "l2_server": row["l2_server"],
@@ -1000,19 +1000,19 @@ async def save_l2_share(
async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit: int = 100, offset: int = 0) -> List[dict]: async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit: int = 100, offset: int = 0) -> List[dict]:
"""Get all items for a user, optionally filtered by type. Deduplicates by content_hash.""" """Get all items for a user, optionally filtered by type. Deduplicates by cid."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
if item_type: if item_type:
rows = await conn.fetch( rows = await conn.fetch(
""" """
SELECT * FROM ( SELECT * FROM (
SELECT DISTINCT ON (it.content_hash) SELECT DISTINCT ON (it.cid)
it.content_hash, it.type, it.description, it.filename, it.pinned, it.created_at, it.cid, it.type, it.description, it.filename, it.pinned, it.created_at,
ci.ipfs_cid ci.ipfs_cid
FROM item_types it FROM item_types it
JOIN cache_items ci ON it.content_hash = ci.content_hash JOIN cache_items ci ON it.cid = ci.cid
WHERE it.actor_id = $1 AND it.type = $2 WHERE it.actor_id = $1 AND it.type = $2
ORDER BY it.content_hash, it.created_at DESC ORDER BY it.cid, it.created_at DESC
) deduped ) deduped
ORDER BY created_at DESC ORDER BY created_at DESC
LIMIT $3 OFFSET $4 LIMIT $3 OFFSET $4
@@ -1023,13 +1023,13 @@ async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit:
rows = await conn.fetch( rows = await conn.fetch(
""" """
SELECT * FROM ( SELECT * FROM (
SELECT DISTINCT ON (it.content_hash) SELECT DISTINCT ON (it.cid)
it.content_hash, it.type, it.description, it.filename, it.pinned, it.created_at, it.cid, it.type, it.description, it.filename, it.pinned, it.created_at,
ci.ipfs_cid ci.ipfs_cid
FROM item_types it FROM item_types it
JOIN cache_items ci ON it.content_hash = ci.content_hash JOIN cache_items ci ON it.cid = ci.cid
WHERE it.actor_id = $1 WHERE it.actor_id = $1
ORDER BY it.content_hash, it.created_at DESC ORDER BY it.cid, it.created_at DESC
) deduped ) deduped
ORDER BY created_at DESC ORDER BY created_at DESC
LIMIT $2 OFFSET $3 LIMIT $2 OFFSET $3
@@ -1039,7 +1039,7 @@ async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit:
return [ return [
{ {
"content_hash": r["content_hash"], "cid": r["cid"],
"type": r["type"], "type": r["type"],
"description": r["description"], "description": r["description"],
"filename": r["filename"], "filename": r["filename"],
@@ -1052,16 +1052,16 @@ async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit:
async def count_user_items(actor_id: str, item_type: Optional[str] = None) -> int: async def count_user_items(actor_id: str, item_type: Optional[str] = None) -> int:
"""Count unique items (by content_hash) for a user.""" """Count unique items (by cid) for a user."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
if item_type: if item_type:
return await conn.fetchval( return await conn.fetchval(
"SELECT COUNT(DISTINCT content_hash) FROM item_types WHERE actor_id = $1 AND type = $2", "SELECT COUNT(DISTINCT cid) FROM item_types WHERE actor_id = $1 AND type = $2",
actor_id, item_type actor_id, item_type
) )
else: else:
return await conn.fetchval( return await conn.fetchval(
"SELECT COUNT(DISTINCT content_hash) FROM item_types WHERE actor_id = $1", "SELECT COUNT(DISTINCT cid) FROM item_types WHERE actor_id = $1",
actor_id actor_id
) )
@@ -1073,7 +1073,7 @@ async def get_run_cache(run_id: str) -> Optional[dict]:
async with pool.acquire() as conn: async with pool.acquire() as conn:
row = await conn.fetchrow( row = await conn.fetchrow(
""" """
SELECT run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at SELECT run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
FROM run_cache WHERE run_id = $1 FROM run_cache WHERE run_id = $1
""", """,
run_id run_id
@@ -1081,7 +1081,7 @@ async def get_run_cache(run_id: str) -> Optional[dict]:
if row: if row:
return { return {
"run_id": row["run_id"], "run_id": row["run_id"],
"output_hash": row["output_hash"], "output_cid": row["output_cid"],
"ipfs_cid": row["ipfs_cid"], "ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"], "provenance_cid": row["provenance_cid"],
"recipe": row["recipe"], "recipe": row["recipe"],
@@ -1094,7 +1094,7 @@ async def get_run_cache(run_id: str) -> Optional[dict]:
async def save_run_cache( async def save_run_cache(
run_id: str, run_id: str,
output_hash: str, output_cid: str,
recipe: str, recipe: str,
inputs: List[str], inputs: List[str],
ipfs_cid: Optional[str] = None, ipfs_cid: Optional[str] = None,
@@ -1105,19 +1105,19 @@ async def save_run_cache(
async with pool.acquire() as conn: async with pool.acquire() as conn:
row = await conn.fetchrow( row = await conn.fetchrow(
""" """
INSERT INTO run_cache (run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id) INSERT INTO run_cache (run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id)
VALUES ($1, $2, $3, $4, $5, $6, $7) VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (run_id) DO UPDATE SET ON CONFLICT (run_id) DO UPDATE SET
output_hash = EXCLUDED.output_hash, output_cid = EXCLUDED.output_cid,
ipfs_cid = COALESCE(EXCLUDED.ipfs_cid, run_cache.ipfs_cid), ipfs_cid = COALESCE(EXCLUDED.ipfs_cid, run_cache.ipfs_cid),
provenance_cid = COALESCE(EXCLUDED.provenance_cid, run_cache.provenance_cid) provenance_cid = COALESCE(EXCLUDED.provenance_cid, run_cache.provenance_cid)
RETURNING run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at RETURNING run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
""", """,
run_id, output_hash, ipfs_cid, provenance_cid, recipe, _json.dumps(inputs), actor_id run_id, output_cid, ipfs_cid, provenance_cid, recipe, _json.dumps(inputs), actor_id
) )
return { return {
"run_id": row["run_id"], "run_id": row["run_id"],
"output_hash": row["output_hash"], "output_cid": row["output_cid"],
"ipfs_cid": row["ipfs_cid"], "ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"], "provenance_cid": row["provenance_cid"],
"recipe": row["recipe"], "recipe": row["recipe"],
@@ -1127,20 +1127,20 @@ async def save_run_cache(
} }
async def get_run_by_output(output_hash: str) -> Optional[dict]: async def get_run_by_output(output_cid: str) -> Optional[dict]:
"""Get run cache entry by output hash.""" """Get run cache entry by output hash."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
row = await conn.fetchrow( row = await conn.fetchrow(
""" """
SELECT run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at SELECT run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
FROM run_cache WHERE output_hash = $1 FROM run_cache WHERE output_cid = $1
""", """,
output_hash output_cid
) )
if row: if row:
return { return {
"run_id": row["run_id"], "run_id": row["run_id"],
"output_hash": row["output_hash"], "output_cid": row["output_cid"],
"ipfs_cid": row["ipfs_cid"], "ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"], "provenance_cid": row["provenance_cid"],
"recipe": row["recipe"], "recipe": row["recipe"],
@@ -1173,7 +1173,7 @@ async def list_runs_by_actor(actor_id: str, offset: int = 0, limit: int = 20) ->
async with pool.acquire() as conn: async with pool.acquire() as conn:
rows = await conn.fetch( rows = await conn.fetch(
""" """
SELECT run_id, output_hash, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at SELECT run_id, output_cid, ipfs_cid, provenance_cid, recipe, inputs, actor_id, created_at
FROM run_cache FROM run_cache
WHERE actor_id = $1 WHERE actor_id = $1
ORDER BY created_at DESC ORDER BY created_at DESC
@@ -1184,7 +1184,7 @@ async def list_runs_by_actor(actor_id: str, offset: int = 0, limit: int = 20) ->
return [ return [
{ {
"run_id": row["run_id"], "run_id": row["run_id"],
"output_hash": row["output_hash"], "output_cid": row["output_cid"],
"ipfs_cid": row["ipfs_cid"], "ipfs_cid": row["ipfs_cid"],
"provenance_cid": row["provenance_cid"], "provenance_cid": row["provenance_cid"],
"recipe": row["recipe"], "recipe": row["recipe"],
@@ -1348,7 +1348,7 @@ async def get_all_active_storage() -> List[dict]:
async def add_storage_pin( async def add_storage_pin(
content_hash: str, cid: str,
storage_id: int, storage_id: int,
ipfs_cid: Optional[str], ipfs_cid: Optional[str],
pin_type: str, pin_type: str,
@@ -1358,40 +1358,40 @@ async def add_storage_pin(
async with pool.acquire() as conn: async with pool.acquire() as conn:
try: try:
row = await conn.fetchrow( row = await conn.fetchrow(
"""INSERT INTO storage_pins (content_hash, storage_id, ipfs_cid, pin_type, size_bytes) """INSERT INTO storage_pins (cid, storage_id, ipfs_cid, pin_type, size_bytes)
VALUES ($1, $2, $3, $4, $5) VALUES ($1, $2, $3, $4, $5)
ON CONFLICT (content_hash, storage_id) DO UPDATE SET ON CONFLICT (cid, storage_id) DO UPDATE SET
ipfs_cid = EXCLUDED.ipfs_cid, ipfs_cid = EXCLUDED.ipfs_cid,
pin_type = EXCLUDED.pin_type, pin_type = EXCLUDED.pin_type,
size_bytes = EXCLUDED.size_bytes, size_bytes = EXCLUDED.size_bytes,
pinned_at = NOW() pinned_at = NOW()
RETURNING id""", RETURNING id""",
content_hash, storage_id, ipfs_cid, pin_type, size_bytes cid, storage_id, ipfs_cid, pin_type, size_bytes
) )
return row["id"] if row else None return row["id"] if row else None
except Exception: except Exception:
return None return None
async def remove_storage_pin(content_hash: str, storage_id: int) -> bool: async def remove_storage_pin(cid: str, storage_id: int) -> bool:
"""Remove a pin record.""" """Remove a pin record."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
result = await conn.execute( result = await conn.execute(
"DELETE FROM storage_pins WHERE content_hash = $1 AND storage_id = $2", "DELETE FROM storage_pins WHERE cid = $1 AND storage_id = $2",
content_hash, storage_id cid, storage_id
) )
return "DELETE 1" in result return "DELETE 1" in result
async def get_pins_for_content(content_hash: str) -> List[dict]: async def get_pins_for_content(cid: str) -> List[dict]:
"""Get all storage locations where content is pinned.""" """Get all storage locations where content is pinned."""
async with pool.acquire() as conn: async with pool.acquire() as conn:
rows = await conn.fetch( rows = await conn.fetch(
"""SELECT sp.*, sb.provider_type, sb.provider_name, sb.actor_id """SELECT sp.*, sb.provider_type, sb.provider_name, sb.actor_id
FROM storage_pins sp FROM storage_pins sp
JOIN storage_backends sb ON sp.storage_id = sb.id JOIN storage_backends sb ON sp.storage_id = sb.id
WHERE sp.content_hash = $1""", WHERE sp.cid = $1""",
content_hash cid
) )
return [dict(row) for row in rows] return [dict(row) for row in rows]

View File

@@ -120,21 +120,21 @@ class SourceExecutor(Executor):
"""Executor for SOURCE nodes - loads content from cache by hash.""" """Executor for SOURCE nodes - loads content from cache by hash."""
def execute(self, config: Dict, inputs: List[Path], output_path: Path) -> Path: def execute(self, config: Dict, inputs: List[Path], output_path: Path) -> Path:
# Source nodes load from cache by content_hash # Source nodes load from cache by cid
content_hash = config.get("content_hash") cid = config.get("cid")
if not content_hash: if not cid:
raise ValueError("SOURCE node requires content_hash in config") raise ValueError("SOURCE node requires cid in config")
# Look up in cache # Look up in cache
source_path = CACHE_DIR / content_hash source_path = CACHE_DIR / cid
if not source_path.exists(): if not source_path.exists():
# Try nodes directory # Try nodes directory
from cache_manager import get_cache_manager from cache_manager import get_cache_manager
cache_manager = get_cache_manager() cache_manager = get_cache_manager()
source_path = cache_manager.get_by_content_hash(content_hash) source_path = cache_manager.get_by_cid(cid)
if not source_path or not source_path.exists(): if not source_path or not source_path.exists():
raise ValueError(f"Source content not in cache: {content_hash}") raise ValueError(f"Source content not in cache: {cid}")
# For source nodes, we just return the path (no transformation) # For source nodes, we just return the path (no transformation)
# The engine will use this as input to subsequent nodes # The engine will use this as input to subsequent nodes
@@ -186,7 +186,7 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
# Input comes from cache by hash (supports both legacy and new cache locations) # Input comes from cache by hash (supports both legacy and new cache locations)
cache_manager = get_cache_manager() cache_manager = get_cache_manager()
input_path = cache_manager.get_by_content_hash(input_hash) input_path = cache_manager.get_by_cid(input_hash)
if not input_path or not input_path.exists(): if not input_path or not input_path.exists():
raise ValueError(f"Input not in cache: {input_hash}") raise ValueError(f"Input not in cache: {input_hash}")
@@ -214,9 +214,9 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
raise ValueError(f"Unknown effect: {effect_name}") raise ValueError(f"Unknown effect: {effect_name}")
# Verify output # Verify output
output_hash = file_hash(result) output_cid = file_hash(result)
if output_hash != expected_hash: if output_cid != expected_hash:
raise ValueError(f"Output hash mismatch: expected {expected_hash}, got {output_hash}") raise ValueError(f"Output hash mismatch: expected {expected_hash}, got {output_cid}")
# Build effect info based on source # Build effect info based on source
if effect_name == "identity": if effect_name == "identity":
@@ -224,7 +224,7 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
artdag_commit = get_artdag_commit() artdag_commit = get_artdag_commit()
effect_info = { effect_info = {
"name": f"effect:{effect_name}", "name": f"effect:{effect_name}",
"content_hash": REGISTRY[f"effect:{effect_name}"]["hash"], "cid": REGISTRY[f"effect:{effect_name}"]["hash"],
"repo": "github", "repo": "github",
"repo_commit": artdag_commit, "repo_commit": artdag_commit,
"repo_url": f"https://github.com/gilesbradshaw/art-dag/blob/{artdag_commit}/artdag/nodes/effect.py" "repo_url": f"https://github.com/gilesbradshaw/art-dag/blob/{artdag_commit}/artdag/nodes/effect.py"
@@ -234,7 +234,7 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
effects_commit = get_effects_commit() effects_commit = get_effects_commit()
effect_info = { effect_info = {
"name": f"effect:{effect_name}", "name": f"effect:{effect_name}",
"content_hash": REGISTRY[f"effect:{effect_name}"]["hash"], "cid": REGISTRY[f"effect:{effect_name}"]["hash"],
"repo": "rose-ash", "repo": "rose-ash",
"repo_commit": effects_commit, "repo_commit": effects_commit,
"repo_url": f"https://git.rose-ash.com/art-dag/effects/src/commit/{effects_commit}/{effect_name}" "repo_url": f"https://git.rose-ash.com/art-dag/effects/src/commit/{effects_commit}/{effect_name}"
@@ -247,15 +247,15 @@ def render_effect(self, input_hash: str, effect_name: str, output_name: str) ->
"rendered_by": "@giles@artdag.rose-ash.com", "rendered_by": "@giles@artdag.rose-ash.com",
"output": { "output": {
"name": output_name, "name": output_name,
"content_hash": output_hash, "cid": output_cid,
}, },
"inputs": [ "inputs": [
{"content_hash": input_hash} {"cid": input_hash}
], ],
"effects": [effect_info], "effects": [effect_info],
"infrastructure": { "infrastructure": {
"software": {"name": "infra:artdag", "content_hash": REGISTRY["infra:artdag"]["hash"]}, "software": {"name": "infra:artdag", "cid": REGISTRY["infra:artdag"]["hash"]},
"hardware": {"name": "infra:giles-hp", "content_hash": REGISTRY["infra:giles-hp"]["hash"]} "hardware": {"name": "infra:giles-hp", "cid": REGISTRY["infra:giles-hp"]["hash"]}
} }
} }
@@ -329,10 +329,10 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
if not result.success: if not result.success:
raise RuntimeError(f"DAG execution failed: {result.error}") raise RuntimeError(f"DAG execution failed: {result.error}")
# Index all node outputs by content_hash and upload to IPFS # Index all node outputs by cid and upload to IPFS
cache_manager = get_cache_manager() cache_manager = get_cache_manager()
output_hash = None output_cid = None
node_hashes = {} # node_id -> content_hash mapping node_hashes = {} # node_id -> cid mapping
node_ipfs_cids = {} # node_id -> ipfs_cid mapping node_ipfs_cids = {} # node_id -> ipfs_cid mapping
# Process all node results (intermediates + output) # Process all node results (intermediates + output)
@@ -341,9 +341,9 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
node = dag.nodes.get(node_id) node = dag.nodes.get(node_id)
# Skip SOURCE nodes - they're already in cache # Skip SOURCE nodes - they're already in cache
if node and (node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE"): if node and (node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE"):
content_hash = node.config.get("content_hash") cid = node.config.get("cid")
if content_hash: if cid:
node_hashes[node_id] = content_hash node_hashes[node_id] = cid
continue continue
# Determine node type for cache metadata # Determine node type for cache metadata
@@ -353,20 +353,20 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
else: else:
cache_node_type = "dag_intermediate" cache_node_type = "dag_intermediate"
# Store in cache_manager (indexes by content_hash, uploads to IPFS) # Store in cache_manager (indexes by cid, uploads to IPFS)
cached, ipfs_cid = cache_manager.put( cached, ipfs_cid = cache_manager.put(
Path(node_path), Path(node_path),
node_type=cache_node_type, node_type=cache_node_type,
node_id=node_id, node_id=node_id,
) )
node_hashes[node_id] = cached.content_hash node_hashes[node_id] = cached.cid
if ipfs_cid: if ipfs_cid:
node_ipfs_cids[node_id] = ipfs_cid node_ipfs_cids[node_id] = ipfs_cid
logger.info(f"Cached node {node_id}: {cached.content_hash[:16]}... -> {ipfs_cid or 'no IPFS'}") logger.info(f"Cached node {node_id}: {cached.cid[:16]}... -> {ipfs_cid or 'no IPFS'}")
# Get output hash from the output node # Get output hash from the output node
if result.output_path and result.output_path.exists(): if result.output_path and result.output_path.exists():
output_hash = file_hash(result.output_path) output_cid = file_hash(result.output_path)
output_ipfs_cid = node_ipfs_cids.get(dag.output_id) output_ipfs_cid = node_ipfs_cids.get(dag.output_id)
# Store output in database (for L2 to query IPFS CID) # Store output in database (for L2 to query IPFS CID)
@@ -376,14 +376,14 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
async def save_to_db(): async def save_to_db():
if database.pool is None: if database.pool is None:
await database.init_db() await database.init_db()
await database.create_cache_item(output_hash, output_ipfs_cid) await database.create_cache_item(output_cid, output_ipfs_cid)
# Also save the run result # Also save the run result
if run_id: if run_id:
input_hashes_for_db = [ input_hashes_for_db = [
node.config.get("content_hash") node.config.get("cid")
for node in dag.nodes.values() for node in dag.nodes.values()
if (node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE") if (node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE")
and node.config.get("content_hash") and node.config.get("cid")
] ]
# Get actor_id and recipe from pending_runs (saved when run started) # Get actor_id and recipe from pending_runs (saved when run started)
actor_id = None actor_id = None
@@ -395,7 +395,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
await database.save_run_cache( await database.save_run_cache(
run_id=run_id, run_id=run_id,
output_hash=output_hash, output_cid=output_cid,
recipe=recipe_name, recipe=recipe_name,
inputs=input_hashes_for_db, inputs=input_hashes_for_db,
ipfs_cid=output_ipfs_cid, ipfs_cid=output_ipfs_cid,
@@ -405,7 +405,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
# Save output as media for the user # Save output as media for the user
if actor_id: if actor_id:
await database.save_item_metadata( await database.save_item_metadata(
content_hash=output_hash, cid=output_cid,
actor_id=actor_id, actor_id=actor_id,
item_type="media", item_type="media",
description=f"Output from recipe: {recipe_name}", description=f"Output from recipe: {recipe_name}",
@@ -431,9 +431,9 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
intermediate_hashes = [] intermediate_hashes = []
for node_id, node in dag.nodes.items(): for node_id, node in dag.nodes.items():
if node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE": if node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE":
content_hash = node.config.get("content_hash") cid = node.config.get("cid")
if content_hash: if cid:
input_hashes.append(content_hash) input_hashes.append(cid)
elif node_id != dag.output_id and node_id in node_hashes: elif node_id != dag.output_id and node_id in node_hashes:
intermediate_hashes.append(node_hashes[node_id]) intermediate_hashes.append(node_hashes[node_id])
@@ -441,9 +441,9 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
from artdag.activities import Activity from artdag.activities import Activity
from datetime import datetime, timezone from datetime import datetime, timezone
activity = Activity( activity = Activity(
activity_id=run_id or f"dag-{output_hash[:16]}", activity_id=run_id or f"dag-{output_cid[:16]}",
input_ids=sorted(input_hashes), input_ids=sorted(input_hashes),
output_id=output_hash, output_id=output_cid,
intermediate_ids=intermediate_hashes, intermediate_ids=intermediate_hashes,
created_at=datetime.now(timezone.utc).timestamp(), created_at=datetime.now(timezone.utc).timestamp(),
status="completed", status="completed",
@@ -454,23 +454,23 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
input_hashes_for_provenance = [] input_hashes_for_provenance = []
for node_id, node in dag.nodes.items(): for node_id, node in dag.nodes.items():
if node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE": if node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE":
content_hash = node.config.get("content_hash") cid = node.config.get("cid")
if content_hash: if cid:
input_hashes_for_provenance.append({"content_hash": content_hash}) input_hashes_for_provenance.append({"cid": cid})
provenance = { provenance = {
"task_id": self.request.id, "task_id": self.request.id,
"run_id": run_id, "run_id": run_id,
"rendered_at": datetime.now(timezone.utc).isoformat(), "rendered_at": datetime.now(timezone.utc).isoformat(),
"output": { "output": {
"content_hash": output_hash, "cid": output_cid,
"ipfs_cid": node_ipfs_cids.get(dag.output_id) if dag.output_id else None, "ipfs_cid": node_ipfs_cids.get(dag.output_id) if dag.output_id else None,
}, },
"inputs": input_hashes_for_provenance, "inputs": input_hashes_for_provenance,
"dag": dag_json, # Full DAG definition "dag": dag_json, # Full DAG definition
"nodes": { "nodes": {
node_id: { node_id: {
"content_hash": node_hashes.get(node_id), "cid": node_hashes.get(node_id),
"ipfs_cid": node_ipfs_cids.get(node_id), "ipfs_cid": node_ipfs_cids.get(node_id),
} }
for node_id in dag.nodes.keys() for node_id in dag.nodes.keys()
@@ -496,7 +496,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
return { return {
"success": True, "success": True,
"run_id": run_id, "run_id": run_id,
"output_hash": output_hash, "output_cid": output_cid,
"output_ipfs_cid": node_ipfs_cids.get(dag.output_id) if dag.output_id else None, "output_ipfs_cid": node_ipfs_cids.get(dag.output_id) if dag.output_id else None,
"output_path": str(result.output_path) if result.output_path else None, "output_path": str(result.output_path) if result.output_path else None,
"execution_time": result.execution_time, "execution_time": result.execution_time,
@@ -505,7 +505,7 @@ def execute_dag(self, dag_json: str, run_id: str = None) -> dict:
"node_results": { "node_results": {
node_id: str(path) for node_id, path in result.node_results.items() node_id: str(path) for node_id, path in result.node_results.items()
}, },
"node_hashes": node_hashes, # node_id -> content_hash "node_hashes": node_hashes, # node_id -> cid
"node_ipfs_cids": node_ipfs_cids, # node_id -> ipfs_cid "node_ipfs_cids": node_ipfs_cids, # node_id -> ipfs_cid
"provenance_cid": provenance_cid, "provenance_cid": provenance_cid,
} }
@@ -526,10 +526,10 @@ def build_effect_dag(input_hashes: List[str], effect_name: str) -> DAG:
# Add source nodes for each input # Add source nodes for each input
source_ids = [] source_ids = []
for i, content_hash in enumerate(input_hashes): for i, cid in enumerate(input_hashes):
source_node = Node( source_node = Node(
node_type=NodeType.SOURCE, node_type=NodeType.SOURCE,
config={"content_hash": content_hash}, config={"cid": cid},
name=f"source_{i}", name=f"source_{i}",
) )
dag.add_node(source_node) dag.add_node(source_node)

File diff suppressed because it is too large Load Diff

View File

@@ -27,12 +27,12 @@ class StorageProvider(ABC):
provider_type: str = "unknown" provider_type: str = "unknown"
@abstractmethod @abstractmethod
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]: async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
""" """
Pin content to storage. Pin content to storage.
Args: Args:
content_hash: SHA3-256 hash of the content cid: SHA3-256 hash of the content
data: Raw bytes to store data: Raw bytes to store
filename: Optional filename hint filename: Optional filename hint
@@ -42,12 +42,12 @@ class StorageProvider(ABC):
pass pass
@abstractmethod @abstractmethod
async def unpin(self, content_hash: str) -> bool: async def unpin(self, cid: str) -> bool:
""" """
Unpin content from storage. Unpin content from storage.
Args: Args:
content_hash: SHA3-256 hash of the content cid: SHA3-256 hash of the content
Returns: Returns:
True if unpinned successfully True if unpinned successfully
@@ -55,12 +55,12 @@ class StorageProvider(ABC):
pass pass
@abstractmethod @abstractmethod
async def get(self, content_hash: str) -> Optional[bytes]: async def get(self, cid: str) -> Optional[bytes]:
""" """
Retrieve content from storage. Retrieve content from storage.
Args: Args:
content_hash: SHA3-256 hash of the content cid: SHA3-256 hash of the content
Returns: Returns:
Raw bytes or None if not found Raw bytes or None if not found
@@ -68,7 +68,7 @@ class StorageProvider(ABC):
pass pass
@abstractmethod @abstractmethod
async def is_pinned(self, content_hash: str) -> bool: async def is_pinned(self, cid: str) -> bool:
"""Check if content is pinned in this storage.""" """Check if content is pinned in this storage."""
pass pass
@@ -111,16 +111,16 @@ class PinataProvider(StorageProvider):
"pinata_secret_api_key": self.secret_key, "pinata_secret_api_key": self.secret_key,
} }
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]: async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Pin content to Pinata.""" """Pin content to Pinata."""
try: try:
import asyncio import asyncio
def do_pin(): def do_pin():
files = {"file": (filename or f"{content_hash[:16]}.bin", data)} files = {"file": (filename or f"{cid[:16]}.bin", data)}
metadata = { metadata = {
"name": filename or content_hash[:16], "name": filename or cid[:16],
"keyvalues": {"content_hash": content_hash} "keyvalues": {"cid": cid}
} }
response = requests.post( response = requests.post(
f"{self.base_url}/pinning/pinFileToIPFS", f"{self.base_url}/pinning/pinFileToIPFS",
@@ -133,22 +133,22 @@ class PinataProvider(StorageProvider):
return response.json().get("IpfsHash") return response.json().get("IpfsHash")
cid = await asyncio.to_thread(do_pin) cid = await asyncio.to_thread(do_pin)
logger.info(f"Pinata: Pinned {content_hash[:16]}... as {cid}") logger.info(f"Pinata: Pinned {cid[:16]}... as {cid}")
return cid return cid
except Exception as e: except Exception as e:
logger.error(f"Pinata pin failed: {e}") logger.error(f"Pinata pin failed: {e}")
return None return None
async def unpin(self, content_hash: str) -> bool: async def unpin(self, cid: str) -> bool:
"""Unpin content from Pinata by finding its CID first.""" """Unpin content from Pinata by finding its CID first."""
try: try:
import asyncio import asyncio
def do_unpin(): def do_unpin():
# First find the pin by content_hash metadata # First find the pin by cid metadata
response = requests.get( response = requests.get(
f"{self.base_url}/data/pinList", f"{self.base_url}/data/pinList",
params={"metadata[keyvalues][content_hash]": content_hash, "status": "pinned"}, params={"metadata[keyvalues][cid]": cid, "status": "pinned"},
headers=self._headers(), headers=self._headers(),
timeout=30 timeout=30
) )
@@ -171,13 +171,13 @@ class PinataProvider(StorageProvider):
return True return True
result = await asyncio.to_thread(do_unpin) result = await asyncio.to_thread(do_unpin)
logger.info(f"Pinata: Unpinned {content_hash[:16]}...") logger.info(f"Pinata: Unpinned {cid[:16]}...")
return result return result
except Exception as e: except Exception as e:
logger.error(f"Pinata unpin failed: {e}") logger.error(f"Pinata unpin failed: {e}")
return False return False
async def get(self, content_hash: str) -> Optional[bytes]: async def get(self, cid: str) -> Optional[bytes]:
"""Get content from Pinata via IPFS gateway.""" """Get content from Pinata via IPFS gateway."""
try: try:
import asyncio import asyncio
@@ -186,7 +186,7 @@ class PinataProvider(StorageProvider):
# First find the CID # First find the CID
response = requests.get( response = requests.get(
f"{self.base_url}/data/pinList", f"{self.base_url}/data/pinList",
params={"metadata[keyvalues][content_hash]": content_hash, "status": "pinned"}, params={"metadata[keyvalues][cid]": cid, "status": "pinned"},
headers=self._headers(), headers=self._headers(),
timeout=30 timeout=30
) )
@@ -213,7 +213,7 @@ class PinataProvider(StorageProvider):
logger.error(f"Pinata get failed: {e}") logger.error(f"Pinata get failed: {e}")
return None return None
async def is_pinned(self, content_hash: str) -> bool: async def is_pinned(self, cid: str) -> bool:
"""Check if content is pinned on Pinata.""" """Check if content is pinned on Pinata."""
try: try:
import asyncio import asyncio
@@ -221,7 +221,7 @@ class PinataProvider(StorageProvider):
def do_check(): def do_check():
response = requests.get( response = requests.get(
f"{self.base_url}/data/pinList", f"{self.base_url}/data/pinList",
params={"metadata[keyvalues][content_hash]": content_hash, "status": "pinned"}, params={"metadata[keyvalues][cid]": cid, "status": "pinned"},
headers=self._headers(), headers=self._headers(),
timeout=30 timeout=30
) )
@@ -286,7 +286,7 @@ class Web3StorageProvider(StorageProvider):
def _headers(self) -> dict: def _headers(self) -> dict:
return {"Authorization": f"Bearer {self.api_token}"} return {"Authorization": f"Bearer {self.api_token}"}
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]: async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Pin content to web3.storage.""" """Pin content to web3.storage."""
try: try:
import asyncio import asyncio
@@ -297,7 +297,7 @@ class Web3StorageProvider(StorageProvider):
data=data, data=data,
headers={ headers={
**self._headers(), **self._headers(),
"X-Name": filename or content_hash[:16] "X-Name": filename or cid[:16]
}, },
timeout=120 timeout=120
) )
@@ -305,24 +305,24 @@ class Web3StorageProvider(StorageProvider):
return response.json().get("cid") return response.json().get("cid")
cid = await asyncio.to_thread(do_pin) cid = await asyncio.to_thread(do_pin)
logger.info(f"web3.storage: Pinned {content_hash[:16]}... as {cid}") logger.info(f"web3.storage: Pinned {cid[:16]}... as {cid}")
return cid return cid
except Exception as e: except Exception as e:
logger.error(f"web3.storage pin failed: {e}") logger.error(f"web3.storage pin failed: {e}")
return None return None
async def unpin(self, content_hash: str) -> bool: async def unpin(self, cid: str) -> bool:
"""web3.storage doesn't support unpinning - data is stored permanently.""" """web3.storage doesn't support unpinning - data is stored permanently."""
logger.warning("web3.storage: Unpinning not supported (permanent storage)") logger.warning("web3.storage: Unpinning not supported (permanent storage)")
return False return False
async def get(self, content_hash: str) -> Optional[bytes]: async def get(self, cid: str) -> Optional[bytes]:
"""Get content from web3.storage - would need CID mapping.""" """Get content from web3.storage - would need CID mapping."""
# web3.storage requires knowing the CID to fetch # web3.storage requires knowing the CID to fetch
# For now, return None - we'd need to maintain a mapping # For now, return None - we'd need to maintain a mapping
return None return None
async def is_pinned(self, content_hash: str) -> bool: async def is_pinned(self, cid: str) -> bool:
"""Check if content is pinned - would need CID mapping.""" """Check if content is pinned - would need CID mapping."""
return False return False
@@ -383,7 +383,7 @@ class NFTStorageProvider(StorageProvider):
def _headers(self) -> dict: def _headers(self) -> dict:
return {"Authorization": f"Bearer {self.api_token}"} return {"Authorization": f"Bearer {self.api_token}"}
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]: async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Pin content to NFT.Storage.""" """Pin content to NFT.Storage."""
try: try:
import asyncio import asyncio
@@ -399,22 +399,22 @@ class NFTStorageProvider(StorageProvider):
return response.json().get("value", {}).get("cid") return response.json().get("value", {}).get("cid")
cid = await asyncio.to_thread(do_pin) cid = await asyncio.to_thread(do_pin)
logger.info(f"NFT.Storage: Pinned {content_hash[:16]}... as {cid}") logger.info(f"NFT.Storage: Pinned {cid[:16]}... as {cid}")
return cid return cid
except Exception as e: except Exception as e:
logger.error(f"NFT.Storage pin failed: {e}") logger.error(f"NFT.Storage pin failed: {e}")
return None return None
async def unpin(self, content_hash: str) -> bool: async def unpin(self, cid: str) -> bool:
"""NFT.Storage doesn't support unpinning - data is stored permanently.""" """NFT.Storage doesn't support unpinning - data is stored permanently."""
logger.warning("NFT.Storage: Unpinning not supported (permanent storage)") logger.warning("NFT.Storage: Unpinning not supported (permanent storage)")
return False return False
async def get(self, content_hash: str) -> Optional[bytes]: async def get(self, cid: str) -> Optional[bytes]:
"""Get content from NFT.Storage - would need CID mapping.""" """Get content from NFT.Storage - would need CID mapping."""
return None return None
async def is_pinned(self, content_hash: str) -> bool: async def is_pinned(self, cid: str) -> bool:
"""Check if content is pinned - would need CID mapping.""" """Check if content is pinned - would need CID mapping."""
return False return False
@@ -459,13 +459,13 @@ class InfuraIPFSProvider(StorageProvider):
def _auth(self) -> tuple: def _auth(self) -> tuple:
return (self.project_id, self.project_secret) return (self.project_id, self.project_secret)
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]: async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Pin content to Infura IPFS.""" """Pin content to Infura IPFS."""
try: try:
import asyncio import asyncio
def do_pin(): def do_pin():
files = {"file": (filename or f"{content_hash[:16]}.bin", data)} files = {"file": (filename or f"{cid[:16]}.bin", data)}
response = requests.post( response = requests.post(
f"{self.base_url}/add", f"{self.base_url}/add",
files=files, files=files,
@@ -476,13 +476,13 @@ class InfuraIPFSProvider(StorageProvider):
return response.json().get("Hash") return response.json().get("Hash")
cid = await asyncio.to_thread(do_pin) cid = await asyncio.to_thread(do_pin)
logger.info(f"Infura IPFS: Pinned {content_hash[:16]}... as {cid}") logger.info(f"Infura IPFS: Pinned {cid[:16]}... as {cid}")
return cid return cid
except Exception as e: except Exception as e:
logger.error(f"Infura IPFS pin failed: {e}") logger.error(f"Infura IPFS pin failed: {e}")
return None return None
async def unpin(self, content_hash: str) -> bool: async def unpin(self, cid: str) -> bool:
"""Unpin content from Infura IPFS.""" """Unpin content from Infura IPFS."""
try: try:
import asyncio import asyncio
@@ -490,7 +490,7 @@ class InfuraIPFSProvider(StorageProvider):
def do_unpin(): def do_unpin():
response = requests.post( response = requests.post(
f"{self.base_url}/pin/rm", f"{self.base_url}/pin/rm",
params={"arg": content_hash}, params={"arg": cid},
auth=self._auth(), auth=self._auth(),
timeout=30 timeout=30
) )
@@ -502,7 +502,7 @@ class InfuraIPFSProvider(StorageProvider):
logger.error(f"Infura IPFS unpin failed: {e}") logger.error(f"Infura IPFS unpin failed: {e}")
return False return False
async def get(self, content_hash: str) -> Optional[bytes]: async def get(self, cid: str) -> Optional[bytes]:
"""Get content from Infura IPFS gateway.""" """Get content from Infura IPFS gateway."""
try: try:
import asyncio import asyncio
@@ -510,7 +510,7 @@ class InfuraIPFSProvider(StorageProvider):
def do_get(): def do_get():
response = requests.post( response = requests.post(
f"{self.base_url}/cat", f"{self.base_url}/cat",
params={"arg": content_hash}, params={"arg": cid},
auth=self._auth(), auth=self._auth(),
timeout=120 timeout=120
) )
@@ -522,7 +522,7 @@ class InfuraIPFSProvider(StorageProvider):
logger.error(f"Infura IPFS get failed: {e}") logger.error(f"Infura IPFS get failed: {e}")
return None return None
async def is_pinned(self, content_hash: str) -> bool: async def is_pinned(self, cid: str) -> bool:
"""Check if content is pinned on Infura IPFS.""" """Check if content is pinned on Infura IPFS."""
try: try:
import asyncio import asyncio
@@ -530,7 +530,7 @@ class InfuraIPFSProvider(StorageProvider):
def do_check(): def do_check():
response = requests.post( response = requests.post(
f"{self.base_url}/pin/ls", f"{self.base_url}/pin/ls",
params={"arg": content_hash}, params={"arg": cid},
auth=self._auth(), auth=self._auth(),
timeout=30 timeout=30
) )
@@ -579,7 +579,7 @@ class FilebaseProvider(StorageProvider):
self.capacity_bytes = capacity_gb * 1024**3 self.capacity_bytes = capacity_gb * 1024**3
self.endpoint = "https://s3.filebase.com" self.endpoint = "https://s3.filebase.com"
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]: async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Pin content to Filebase.""" """Pin content to Filebase."""
try: try:
import asyncio import asyncio
@@ -594,20 +594,20 @@ class FilebaseProvider(StorageProvider):
aws_secret_access_key=self.secret_key, aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4') config=Config(signature_version='s3v4')
) )
key = filename or f"{content_hash[:16]}.bin" key = filename or f"{cid[:16]}.bin"
s3.put_object(Bucket=self.bucket, Key=key, Body=data) s3.put_object(Bucket=self.bucket, Key=key, Body=data)
# Get CID from response headers # Get CID from response headers
head = s3.head_object(Bucket=self.bucket, Key=key) head = s3.head_object(Bucket=self.bucket, Key=key)
return head.get('Metadata', {}).get('cid', content_hash) return head.get('Metadata', {}).get('cid', cid)
cid = await asyncio.to_thread(do_pin) cid = await asyncio.to_thread(do_pin)
logger.info(f"Filebase: Pinned {content_hash[:16]}... as {cid}") logger.info(f"Filebase: Pinned {cid[:16]}... as {cid}")
return cid return cid
except Exception as e: except Exception as e:
logger.error(f"Filebase pin failed: {e}") logger.error(f"Filebase pin failed: {e}")
return None return None
async def unpin(self, content_hash: str) -> bool: async def unpin(self, cid: str) -> bool:
"""Remove content from Filebase.""" """Remove content from Filebase."""
try: try:
import asyncio import asyncio
@@ -622,7 +622,7 @@ class FilebaseProvider(StorageProvider):
aws_secret_access_key=self.secret_key, aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4') config=Config(signature_version='s3v4')
) )
s3.delete_object(Bucket=self.bucket, Key=content_hash) s3.delete_object(Bucket=self.bucket, Key=cid)
return True return True
return await asyncio.to_thread(do_unpin) return await asyncio.to_thread(do_unpin)
@@ -630,7 +630,7 @@ class FilebaseProvider(StorageProvider):
logger.error(f"Filebase unpin failed: {e}") logger.error(f"Filebase unpin failed: {e}")
return False return False
async def get(self, content_hash: str) -> Optional[bytes]: async def get(self, cid: str) -> Optional[bytes]:
"""Get content from Filebase.""" """Get content from Filebase."""
try: try:
import asyncio import asyncio
@@ -645,7 +645,7 @@ class FilebaseProvider(StorageProvider):
aws_secret_access_key=self.secret_key, aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4') config=Config(signature_version='s3v4')
) )
response = s3.get_object(Bucket=self.bucket, Key=content_hash) response = s3.get_object(Bucket=self.bucket, Key=cid)
return response['Body'].read() return response['Body'].read()
return await asyncio.to_thread(do_get) return await asyncio.to_thread(do_get)
@@ -653,7 +653,7 @@ class FilebaseProvider(StorageProvider):
logger.error(f"Filebase get failed: {e}") logger.error(f"Filebase get failed: {e}")
return None return None
async def is_pinned(self, content_hash: str) -> bool: async def is_pinned(self, cid: str) -> bool:
"""Check if content exists in Filebase.""" """Check if content exists in Filebase."""
try: try:
import asyncio import asyncio
@@ -668,7 +668,7 @@ class FilebaseProvider(StorageProvider):
aws_secret_access_key=self.secret_key, aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4') config=Config(signature_version='s3v4')
) )
s3.head_object(Bucket=self.bucket, Key=content_hash) s3.head_object(Bucket=self.bucket, Key=cid)
return True return True
return await asyncio.to_thread(do_check) return await asyncio.to_thread(do_check)
@@ -718,7 +718,7 @@ class StorjProvider(StorageProvider):
self.capacity_bytes = capacity_gb * 1024**3 self.capacity_bytes = capacity_gb * 1024**3
self.endpoint = "https://gateway.storjshare.io" self.endpoint = "https://gateway.storjshare.io"
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]: async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Store content on Storj.""" """Store content on Storj."""
try: try:
import asyncio import asyncio
@@ -733,18 +733,18 @@ class StorjProvider(StorageProvider):
aws_secret_access_key=self.secret_key, aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4') config=Config(signature_version='s3v4')
) )
key = filename or content_hash key = filename or cid
s3.put_object(Bucket=self.bucket, Key=key, Body=data) s3.put_object(Bucket=self.bucket, Key=key, Body=data)
return content_hash return cid
result = await asyncio.to_thread(do_pin) result = await asyncio.to_thread(do_pin)
logger.info(f"Storj: Stored {content_hash[:16]}...") logger.info(f"Storj: Stored {cid[:16]}...")
return result return result
except Exception as e: except Exception as e:
logger.error(f"Storj pin failed: {e}") logger.error(f"Storj pin failed: {e}")
return None return None
async def unpin(self, content_hash: str) -> bool: async def unpin(self, cid: str) -> bool:
"""Remove content from Storj.""" """Remove content from Storj."""
try: try:
import asyncio import asyncio
@@ -759,7 +759,7 @@ class StorjProvider(StorageProvider):
aws_secret_access_key=self.secret_key, aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4') config=Config(signature_version='s3v4')
) )
s3.delete_object(Bucket=self.bucket, Key=content_hash) s3.delete_object(Bucket=self.bucket, Key=cid)
return True return True
return await asyncio.to_thread(do_unpin) return await asyncio.to_thread(do_unpin)
@@ -767,7 +767,7 @@ class StorjProvider(StorageProvider):
logger.error(f"Storj unpin failed: {e}") logger.error(f"Storj unpin failed: {e}")
return False return False
async def get(self, content_hash: str) -> Optional[bytes]: async def get(self, cid: str) -> Optional[bytes]:
"""Get content from Storj.""" """Get content from Storj."""
try: try:
import asyncio import asyncio
@@ -782,7 +782,7 @@ class StorjProvider(StorageProvider):
aws_secret_access_key=self.secret_key, aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4') config=Config(signature_version='s3v4')
) )
response = s3.get_object(Bucket=self.bucket, Key=content_hash) response = s3.get_object(Bucket=self.bucket, Key=cid)
return response['Body'].read() return response['Body'].read()
return await asyncio.to_thread(do_get) return await asyncio.to_thread(do_get)
@@ -790,7 +790,7 @@ class StorjProvider(StorageProvider):
logger.error(f"Storj get failed: {e}") logger.error(f"Storj get failed: {e}")
return None return None
async def is_pinned(self, content_hash: str) -> bool: async def is_pinned(self, cid: str) -> bool:
"""Check if content exists on Storj.""" """Check if content exists on Storj."""
try: try:
import asyncio import asyncio
@@ -805,7 +805,7 @@ class StorjProvider(StorageProvider):
aws_secret_access_key=self.secret_key, aws_secret_access_key=self.secret_key,
config=Config(signature_version='s3v4') config=Config(signature_version='s3v4')
) )
s3.head_object(Bucket=self.bucket, Key=content_hash) s3.head_object(Bucket=self.bucket, Key=cid)
return True return True
return await asyncio.to_thread(do_check) return await asyncio.to_thread(do_check)
@@ -854,37 +854,37 @@ class LocalStorageProvider(StorageProvider):
# Create directory if it doesn't exist # Create directory if it doesn't exist
self.base_path.mkdir(parents=True, exist_ok=True) self.base_path.mkdir(parents=True, exist_ok=True)
def _get_file_path(self, content_hash: str) -> Path: def _get_file_path(self, cid: str) -> Path:
"""Get file path for a content hash (using subdirectories).""" """Get file path for a content hash (using subdirectories)."""
# Use first 2 chars as subdirectory for better filesystem performance # Use first 2 chars as subdirectory for better filesystem performance
subdir = content_hash[:2] subdir = cid[:2]
return self.base_path / subdir / content_hash return self.base_path / subdir / cid
async def pin(self, content_hash: str, data: bytes, filename: Optional[str] = None) -> Optional[str]: async def pin(self, cid: str, data: bytes, filename: Optional[str] = None) -> Optional[str]:
"""Store content locally.""" """Store content locally."""
try: try:
import asyncio import asyncio
def do_store(): def do_store():
file_path = self._get_file_path(content_hash) file_path = self._get_file_path(cid)
file_path.parent.mkdir(parents=True, exist_ok=True) file_path.parent.mkdir(parents=True, exist_ok=True)
file_path.write_bytes(data) file_path.write_bytes(data)
return content_hash # Use content_hash as ID for local storage return cid # Use cid as ID for local storage
result = await asyncio.to_thread(do_store) result = await asyncio.to_thread(do_store)
logger.info(f"Local: Stored {content_hash[:16]}...") logger.info(f"Local: Stored {cid[:16]}...")
return result return result
except Exception as e: except Exception as e:
logger.error(f"Local storage failed: {e}") logger.error(f"Local storage failed: {e}")
return None return None
async def unpin(self, content_hash: str) -> bool: async def unpin(self, cid: str) -> bool:
"""Remove content from local storage.""" """Remove content from local storage."""
try: try:
import asyncio import asyncio
def do_remove(): def do_remove():
file_path = self._get_file_path(content_hash) file_path = self._get_file_path(cid)
if file_path.exists(): if file_path.exists():
file_path.unlink() file_path.unlink()
return True return True
@@ -895,13 +895,13 @@ class LocalStorageProvider(StorageProvider):
logger.error(f"Local unpin failed: {e}") logger.error(f"Local unpin failed: {e}")
return False return False
async def get(self, content_hash: str) -> Optional[bytes]: async def get(self, cid: str) -> Optional[bytes]:
"""Get content from local storage.""" """Get content from local storage."""
try: try:
import asyncio import asyncio
def do_get(): def do_get():
file_path = self._get_file_path(content_hash) file_path = self._get_file_path(cid)
if file_path.exists(): if file_path.exists():
return file_path.read_bytes() return file_path.read_bytes()
return None return None
@@ -911,9 +911,9 @@ class LocalStorageProvider(StorageProvider):
logger.error(f"Local get failed: {e}") logger.error(f"Local get failed: {e}")
return None return None
async def is_pinned(self, content_hash: str) -> bool: async def is_pinned(self, cid: str) -> bool:
"""Check if content exists in local storage.""" """Check if content exists in local storage."""
return self._get_file_path(content_hash).exists() return self._get_file_path(cid).exists()
async def test_connection(self) -> tuple[bool, str]: async def test_connection(self) -> tuple[bool, str]:
"""Test local storage is writable.""" """Test local storage is writable."""

View File

@@ -81,8 +81,8 @@ def execute_step(
# Get L1 cache manager (IPFS-backed) # Get L1 cache manager (IPFS-backed)
cache_mgr = get_cache_manager() cache_mgr = get_cache_manager()
# Check if already cached (by cache_id as content_hash) # Check if already cached (by cache_id as cid)
cached_path = cache_mgr.get_by_content_hash(step.cache_id) cached_path = cache_mgr.get_by_cid(step.cache_id)
if cached_path: if cached_path:
logger.info(f"Step {step.step_id} already cached at {cached_path}") logger.info(f"Step {step.step_id} already cached at {cached_path}")
@@ -141,14 +141,14 @@ def execute_step(
try: try:
# Handle SOURCE nodes # Handle SOURCE nodes
if step.node_type == "SOURCE": if step.node_type == "SOURCE":
content_hash = step.config.get("content_hash") cid = step.config.get("cid")
if not content_hash: if not cid:
raise ValueError(f"SOURCE step missing content_hash") raise ValueError(f"SOURCE step missing cid")
# Look up in cache # Look up in cache
path = cache_mgr.get_by_content_hash(content_hash) path = cache_mgr.get_by_cid(cid)
if not path: if not path:
raise ValueError(f"SOURCE input not found in cache: {content_hash[:16]}...") raise ValueError(f"SOURCE input not found in cache: {cid[:16]}...")
output_path = str(path) output_path = str(path)
complete_task(step.cache_id, worker_id, output_path) complete_task(step.cache_id, worker_id, output_path)
@@ -165,7 +165,7 @@ def execute_step(
for item_id in step.config.get("items", []): for item_id in step.config.get("items", []):
item_cache_id = input_cache_ids.get(item_id) item_cache_id = input_cache_ids.get(item_id)
if item_cache_id: if item_cache_id:
path = cache_mgr.get_by_content_hash(item_cache_id) path = cache_mgr.get_by_cid(item_cache_id)
if path: if path:
item_paths.append(str(path)) item_paths.append(str(path))
@@ -190,7 +190,7 @@ def execute_step(
input_cache_id = input_cache_ids.get(input_step_id) input_cache_id = input_cache_ids.get(input_step_id)
if not input_cache_id: if not input_cache_id:
raise ValueError(f"No cache_id for input step: {input_step_id}") raise ValueError(f"No cache_id for input step: {input_step_id}")
path = cache_mgr.get_by_content_hash(input_cache_id) path = cache_mgr.get_by_cid(input_cache_id)
if not path: if not path:
raise ValueError(f"Input not in cache: {input_cache_id[:16]}...") raise ValueError(f"Input not in cache: {input_cache_id[:16]}...")
input_paths.append(Path(path)) input_paths.append(Path(path))
@@ -276,7 +276,7 @@ def execute_step(
"step_id": step.step_id, "step_id": step.step_id,
"cache_id": step.cache_id, "cache_id": step.cache_id,
"output_path": str(cached_file.path), "output_path": str(cached_file.path),
"content_hash": cached_file.content_hash, "cid": cached_file.cid,
"ipfs_cid": ipfs_cid, "ipfs_cid": ipfs_cid,
"filter_count": len(filter_chain), "filter_count": len(filter_chain),
} }
@@ -298,7 +298,7 @@ def execute_step(
if not input_cache_id: if not input_cache_id:
raise ValueError(f"No cache_id for input step: {input_step_id}") raise ValueError(f"No cache_id for input step: {input_step_id}")
path = cache_mgr.get_by_content_hash(input_cache_id) path = cache_mgr.get_by_cid(input_cache_id)
if not path: if not path:
raise ValueError(f"Input not in cache: {input_cache_id[:16]}...") raise ValueError(f"Input not in cache: {input_cache_id[:16]}...")
@@ -336,7 +336,7 @@ def execute_step(
"media_type": output_def.media_type, "media_type": output_def.media_type,
"index": output_def.index, "index": output_def.index,
"path": str(cached_file.path), "path": str(cached_file.path),
"content_hash": cached_file.content_hash, "cid": cached_file.cid,
"ipfs_cid": ipfs_cid, "ipfs_cid": ipfs_cid,
}) })
else: else:
@@ -347,7 +347,7 @@ def execute_step(
"media_type": "video/mp4", "media_type": "video/mp4",
"index": 0, "index": 0,
"path": str(cached_file.path), "path": str(cached_file.path),
"content_hash": cached_file.content_hash, "cid": cached_file.cid,
"ipfs_cid": ipfs_cid, "ipfs_cid": ipfs_cid,
}) })
@@ -362,7 +362,7 @@ def execute_step(
"name": step.name, "name": step.name,
"cache_id": step.cache_id, "cache_id": step.cache_id,
"output_path": str(cached_file.path), "output_path": str(cached_file.path),
"content_hash": cached_file.content_hash, "cid": cached_file.cid,
"ipfs_cid": ipfs_cid, "ipfs_cid": ipfs_cid,
"outputs": outputs, "outputs": outputs,
} }

View File

@@ -140,7 +140,7 @@ def execute_step_sexp(
cache_mgr = get_cache_manager() cache_mgr = get_cache_manager()
# Check if already cached # Check if already cached
cached_path = cache_mgr.get_by_content_hash(cache_id) cached_path = cache_mgr.get_by_cid(cache_id)
if cached_path: if cached_path:
logger.info(f"Step {step_id} already cached at {cached_path}") logger.info(f"Step {step_id} already cached at {cached_path}")
@@ -202,7 +202,7 @@ def execute_step_sexp(
if not content_id: if not content_id:
raise ValueError("SOURCE step missing :cid or :hash") raise ValueError("SOURCE step missing :cid or :hash")
path = cache_mgr.get_by_content_hash(content_id) path = cache_mgr.get_by_cid(content_id)
if not path: if not path:
raise ValueError(f"SOURCE input not found: {content_id[:16]}...") raise ValueError(f"SOURCE input not found: {content_id[:16]}...")
@@ -226,7 +226,7 @@ def execute_step_sexp(
input_paths = [] input_paths = []
for inp in inputs: for inp in inputs:
inp_cache_id = input_cache_ids.get(inp, inp) inp_cache_id = input_cache_ids.get(inp, inp)
path = cache_mgr.get_by_content_hash(inp_cache_id) path = cache_mgr.get_by_cid(inp_cache_id)
if not path: if not path:
raise ValueError(f"Input not found: {inp_cache_id[:16]}...") raise ValueError(f"Input not found: {inp_cache_id[:16]}...")
input_paths.append(Path(path)) input_paths.append(Path(path))
@@ -261,7 +261,7 @@ def execute_step_sexp(
input_paths = [] input_paths = []
for inp in inputs: for inp in inputs:
inp_cache_id = input_cache_ids.get(inp, inp) inp_cache_id = input_cache_ids.get(inp, inp)
path = cache_mgr.get_by_content_hash(inp_cache_id) path = cache_mgr.get_by_cid(inp_cache_id)
if not path: if not path:
raise ValueError(f"Input not found: {inp_cache_id[:16]}...") raise ValueError(f"Input not found: {inp_cache_id[:16]}...")
input_paths.append(Path(path)) input_paths.append(Path(path))
@@ -366,7 +366,7 @@ def execute_step_sexp(
"step_id": step_id, "step_id": step_id,
"cache_id": cache_id, "cache_id": cache_id,
"output_path": str(cached_file.path), "output_path": str(cached_file.path),
"content_hash": cached_file.content_hash, "cid": cached_file.cid,
"ipfs_cid": ipfs_cid, "ipfs_cid": ipfs_cid,
"filter_count": len(filter_chain), "filter_count": len(filter_chain),
} }
@@ -386,7 +386,7 @@ def execute_step_sexp(
input_paths = [] input_paths = []
for inp in inputs: for inp in inputs:
inp_cache_id = input_cache_ids.get(inp, inp) inp_cache_id = input_cache_ids.get(inp, inp)
path = cache_mgr.get_by_content_hash(inp_cache_id) path = cache_mgr.get_by_cid(inp_cache_id)
if not path: if not path:
raise ValueError(f"Input not found: {inp_cache_id[:16]}...") raise ValueError(f"Input not found: {inp_cache_id[:16]}...")
input_paths.append(Path(path)) input_paths.append(Path(path))
@@ -420,7 +420,7 @@ def execute_step_sexp(
"step_id": step_id, "step_id": step_id,
"cache_id": cache_id, "cache_id": cache_id,
"output_path": str(cached_file.path), "output_path": str(cached_file.path),
"content_hash": cached_file.content_hash, "cid": cached_file.cid,
"ipfs_cid": ipfs_cid, "ipfs_cid": ipfs_cid,
} }

View File

@@ -80,8 +80,8 @@ def run_plan(
cache_ids[step.step_id] = step.cache_id cache_ids[step.step_id] = step.cache_id
# Also map input hashes # Also map input hashes
for name, content_hash in plan.input_hashes.items(): for name, cid in plan.input_hashes.items():
cache_ids[name] = content_hash cache_ids[name] = cid
# Group steps by level # Group steps by level
steps_by_level = plan.get_steps_by_level() steps_by_level = plan.get_steps_by_level()
@@ -103,7 +103,7 @@ def run_plan(
for step in level_steps: for step in level_steps:
# Check if cached # Check if cached
cached_path = cache_mgr.get_by_content_hash(step.cache_id) cached_path = cache_mgr.get_by_cid(step.cache_id)
if cached_path: if cached_path:
results_by_step[step.step_id] = { results_by_step[step.step_id] = {
"status": "cached", "status": "cached",
@@ -171,7 +171,7 @@ def run_plan(
output_name = plan.output_name output_name = plan.output_name
if output_cache_id: if output_cache_id:
output_path = cache_mgr.get_by_content_hash(output_cache_id) output_path = cache_mgr.get_by_cid(output_cache_id)
output_ipfs_cid = cache_mgr.get_ipfs_cid(output_cache_id) output_ipfs_cid = cache_mgr.get_ipfs_cid(output_cache_id)
# Build list of all outputs with their names and artifacts # Build list of all outputs with their names and artifacts
@@ -183,7 +183,7 @@ def run_plan(
# If no outputs in result, build from step definition # If no outputs in result, build from step definition
if not step_outputs and step.outputs: if not step_outputs and step.outputs:
for output_def in step.outputs: for output_def in step.outputs:
output_cache_path = cache_mgr.get_by_content_hash(output_def.cache_id) output_cache_path = cache_mgr.get_by_cid(output_def.cache_id)
output_ipfs = cache_mgr.get_ipfs_cid(output_def.cache_id) if output_cache_path else None output_ipfs = cache_mgr.get_ipfs_cid(output_def.cache_id) if output_cache_path else None
all_outputs.append({ all_outputs.append({
"name": output_def.name, "name": output_def.name,
@@ -318,28 +318,28 @@ def run_recipe(
node_id = analysis_node["node_id"] node_id = analysis_node["node_id"]
# Resolve input reference to content hash # Resolve input reference to content hash
content_hash = input_hashes.get(input_ref) cid = input_hashes.get(input_ref)
if not content_hash: if not cid:
logger.warning(f"Analysis node {node_id}: input '{input_ref}' not in input_hashes") logger.warning(f"Analysis node {node_id}: input '{input_ref}' not in input_hashes")
continue continue
path = cache_mgr.get_by_content_hash(content_hash) path = cache_mgr.get_by_cid(cid)
if not path: if not path:
logger.warning(f"Analysis node {node_id}: content {content_hash[:16]}... not in cache") logger.warning(f"Analysis node {node_id}: content {cid[:16]}... not in cache")
continue continue
try: try:
# Run analysis for the specific feature # Run analysis for the specific feature
features = [feature] if feature else ["beats", "energy"] features = [feature] if feature else ["beats", "energy"]
result = analyzer.analyze( result = analyzer.analyze(
input_hash=content_hash, input_hash=cid,
features=features, features=features,
input_path=Path(path), input_path=Path(path),
) )
# Store result keyed by node_id so plan can reference it # Store result keyed by node_id so plan can reference it
analysis_results[node_id] = result analysis_results[node_id] = result
# Also store by content_hash for compatibility # Also store by cid for compatibility
analysis_results[content_hash] = result analysis_results[cid] = result
logger.info(f"Analysis {node_id}: feature={feature}, tempo={result.tempo}") logger.info(f"Analysis {node_id}: feature={feature}, tempo={result.tempo}")
except Exception as e: except Exception as e:
logger.warning(f"Analysis failed for {node_id}: {e}") logger.warning(f"Analysis failed for {node_id}: {e}")
@@ -380,7 +380,7 @@ def run_recipe(
# Store in cache (content-addressed, auto-pins to IPFS) # Store in cache (content-addressed, auto-pins to IPFS)
# Plan is just another node output - no special treatment needed # Plan is just another node output - no special treatment needed
cached, plan_ipfs_cid = cache_mgr.put(tmp_path, node_type="plan", move=True) cached, plan_ipfs_cid = cache_mgr.put(tmp_path, node_type="plan", move=True)
logger.info(f"Plan cached: hash={cached.content_hash}, ipfs={plan_ipfs_cid}") logger.info(f"Plan cached: hash={cached.cid}, ipfs={plan_ipfs_cid}")
# Phase 4: Execute # Phase 4: Execute
logger.info("Phase 4: Executing plan...") logger.info("Phase 4: Executing plan...")
@@ -392,7 +392,7 @@ def run_recipe(
"run_id": run_id, "run_id": run_id,
"recipe": compiled.name, "recipe": compiled.name,
"plan_id": plan.plan_id, "plan_id": plan.plan_id,
"plan_cache_id": cached.content_hash, "plan_cache_id": cached.cid,
"plan_ipfs_cid": plan_ipfs_cid, "plan_ipfs_cid": plan_ipfs_cid,
"output_path": result.get("output_path"), "output_path": result.get("output_path"),
"output_cache_id": result.get("output_cache_id"), "output_cache_id": result.get("output_cache_id"),
@@ -454,21 +454,21 @@ def generate_plan(
feature = analysis_node["feature"] feature = analysis_node["feature"]
node_id = analysis_node["node_id"] node_id = analysis_node["node_id"]
content_hash = input_hashes.get(input_ref) cid = input_hashes.get(input_ref)
if not content_hash: if not cid:
continue continue
path = cache_mgr.get_by_content_hash(content_hash) path = cache_mgr.get_by_cid(cid)
if path: if path:
try: try:
features = [feature] if feature else ["beats", "energy"] features = [feature] if feature else ["beats", "energy"]
result = analyzer.analyze( result = analyzer.analyze(
input_hash=content_hash, input_hash=cid,
features=features, features=features,
input_path=Path(path), input_path=Path(path),
) )
analysis_results[node_id] = result analysis_results[node_id] = result
analysis_results[content_hash] = result analysis_results[cid] = result
except Exception as e: except Exception as e:
logger.warning(f"Analysis failed for {node_id}: {e}") logger.warning(f"Analysis failed for {node_id}: {e}")

View File

@@ -67,7 +67,7 @@ def register_input_cid(
input_path: Local path to the input file input_path: Local path to the input file
Returns: Returns:
Dict with 'cid' and 'content_hash' Dict with 'cid' and 'cid'
""" """
import hashlib import hashlib
@@ -77,7 +77,7 @@ def register_input_cid(
# Compute content hash # Compute content hash
with open(path, "rb") as f: with open(path, "rb") as f:
content_hash = hashlib.sha3_256(f.read()).hexdigest() cid = hashlib.sha3_256(f.read()).hexdigest()
# Add to IPFS # Add to IPFS
cid = ipfs_client.add_file(path) cid = ipfs_client.add_file(path)
@@ -89,7 +89,7 @@ def register_input_cid(
return { return {
"status": "completed", "status": "completed",
"cid": cid, "cid": cid,
"content_hash": content_hash, "cid": cid,
"path": str(path), "path": str(path),
} }
@@ -426,7 +426,7 @@ def run_from_local(
return {"status": "failed", "phase": "register_input", "input": name, "error": result.get("error")} return {"status": "failed", "phase": "register_input", "input": name, "error": result.get("error")}
input_cids[name] = result["cid"] input_cids[name] = result["cid"]
input_hashes[name] = result["content_hash"] input_hashes[name] = result["cid"]
# Run the pipeline # Run the pipeline
return run_recipe_cid.apply_async( return run_recipe_cid.apply_async(

View File

@@ -130,13 +130,13 @@ class TestL2SharedChecker:
class TestL1CacheManagerStorage: class TestL1CacheManagerStorage:
"""Tests for cache storage operations.""" """Tests for cache storage operations."""
def test_put_and_get_by_content_hash(self, manager, temp_dir): def test_put_and_get_by_cid(self, manager, temp_dir):
"""Can store and retrieve by content hash.""" """Can store and retrieve by content hash."""
test_file = create_test_file(temp_dir / "input.txt", "hello world") test_file = create_test_file(temp_dir / "input.txt", "hello world")
cached = manager.put(test_file, node_type="test") cached = manager.put(test_file, node_type="test")
retrieved_path = manager.get_by_content_hash(cached.content_hash) retrieved_path = manager.get_by_cid(cached.cid)
assert retrieved_path is not None assert retrieved_path is not None
assert retrieved_path.read_text() == "hello world" assert retrieved_path.read_text() == "hello world"
@@ -155,7 +155,7 @@ class TestL1CacheManagerStorage:
cached = manager.put(test_file, node_type="test") cached = manager.put(test_file, node_type="test")
assert manager.has_content(cached.content_hash) is True assert manager.has_content(cached.cid) is True
assert manager.has_content("nonexistent") is False assert manager.has_content("nonexistent") is False
def test_list_all(self, manager, temp_dir): def test_list_all(self, manager, temp_dir):
@@ -177,7 +177,7 @@ class TestL1CacheManagerStorage:
cached1 = manager.put(f1, node_type="test") cached1 = manager.put(f1, node_type="test")
cached2 = manager.put(f2, node_type="test") cached2 = manager.put(f2, node_type="test")
assert cached1.content_hash == cached2.content_hash assert cached1.cid == cached2.cid
assert len(manager.list_all()) == 1 assert len(manager.list_all()) == 1
@@ -193,14 +193,14 @@ class TestL1CacheManagerActivities:
output_cached = manager.put(output_file, node_type="effect") output_cached = manager.put(output_file, node_type="effect")
activity = manager.record_simple_activity( activity = manager.record_simple_activity(
input_hashes=[input_cached.content_hash], input_hashes=[input_cached.cid],
output_hash=output_cached.content_hash, output_cid=output_cached.cid,
run_id="run-001", run_id="run-001",
) )
assert activity.activity_id == "run-001" assert activity.activity_id == "run-001"
assert input_cached.content_hash in activity.input_ids assert input_cached.cid in activity.input_ids
assert activity.output_id == output_cached.content_hash assert activity.output_id == output_cached.cid
def test_list_activities(self, manager, temp_dir): def test_list_activities(self, manager, temp_dir):
"""Can list all activities.""" """Can list all activities."""
@@ -209,7 +209,7 @@ class TestL1CacheManagerActivities:
out = create_test_file(temp_dir / f"out{i}.txt", f"output{i}") out = create_test_file(temp_dir / f"out{i}.txt", f"output{i}")
inp_c = manager.put(inp, node_type="source") inp_c = manager.put(inp, node_type="source")
out_c = manager.put(out, node_type="effect") out_c = manager.put(out, node_type="effect")
manager.record_simple_activity([inp_c.content_hash], out_c.content_hash) manager.record_simple_activity([inp_c.cid], out_c.cid)
activities = manager.list_activities() activities = manager.list_activities()
assert len(activities) == 3 assert len(activities) == 3
@@ -225,10 +225,10 @@ class TestL1CacheManagerActivities:
out1_c = manager.put(out1, node_type="effect") out1_c = manager.put(out1, node_type="effect")
out2_c = manager.put(out2, node_type="effect") out2_c = manager.put(out2, node_type="effect")
manager.record_simple_activity([input_cached.content_hash], out1_c.content_hash, "run1") manager.record_simple_activity([input_cached.cid], out1_c.cid, "run1")
manager.record_simple_activity([input_cached.content_hash], out2_c.content_hash, "run2") manager.record_simple_activity([input_cached.cid], out2_c.cid, "run2")
found = manager.find_activities_by_inputs([input_cached.content_hash]) found = manager.find_activities_by_inputs([input_cached.cid])
assert len(found) == 2 assert len(found) == 2
@@ -240,7 +240,7 @@ class TestL1CacheManagerDeletionRules:
test_file = create_test_file(temp_dir / "orphan.txt", "orphan") test_file = create_test_file(temp_dir / "orphan.txt", "orphan")
cached = manager.put(test_file, node_type="test") cached = manager.put(test_file, node_type="test")
can_delete, reason = manager.can_delete(cached.content_hash) can_delete, reason = manager.can_delete(cached.cid)
assert can_delete is True assert can_delete is True
def test_cannot_delete_activity_input(self, manager, temp_dir): def test_cannot_delete_activity_input(self, manager, temp_dir):
@@ -252,11 +252,11 @@ class TestL1CacheManagerDeletionRules:
output_cached = manager.put(output_file, node_type="effect") output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity( manager.record_simple_activity(
[input_cached.content_hash], [input_cached.cid],
output_cached.content_hash, output_cached.cid,
) )
can_delete, reason = manager.can_delete(input_cached.content_hash) can_delete, reason = manager.can_delete(input_cached.cid)
assert can_delete is False assert can_delete is False
assert "input" in reason.lower() assert "input" in reason.lower()
@@ -269,11 +269,11 @@ class TestL1CacheManagerDeletionRules:
output_cached = manager.put(output_file, node_type="effect") output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity( manager.record_simple_activity(
[input_cached.content_hash], [input_cached.cid],
output_cached.content_hash, output_cached.cid,
) )
can_delete, reason = manager.can_delete(output_cached.content_hash) can_delete, reason = manager.can_delete(output_cached.cid)
assert can_delete is False assert can_delete is False
assert "output" in reason.lower() assert "output" in reason.lower()
@@ -283,9 +283,9 @@ class TestL1CacheManagerDeletionRules:
cached = manager.put(test_file, node_type="test") cached = manager.put(test_file, node_type="test")
# Mark as pinned (published) # Mark as pinned (published)
manager.pin(cached.content_hash, reason="published") manager.pin(cached.cid, reason="published")
can_delete, reason = manager.can_delete(cached.content_hash) can_delete, reason = manager.can_delete(cached.cid)
assert can_delete is False assert can_delete is False
assert "pinned" in reason assert "pinned" in reason
@@ -294,10 +294,10 @@ class TestL1CacheManagerDeletionRules:
test_file = create_test_file(temp_dir / "orphan.txt", "orphan") test_file = create_test_file(temp_dir / "orphan.txt", "orphan")
cached = manager.put(test_file, node_type="test") cached = manager.put(test_file, node_type="test")
success, msg = manager.delete_by_content_hash(cached.content_hash) success, msg = manager.delete_by_cid(cached.cid)
assert success is True assert success is True
assert manager.has_content(cached.content_hash) is False assert manager.has_content(cached.cid) is False
def test_delete_protected_item_fails(self, manager, temp_dir): def test_delete_protected_item_fails(self, manager, temp_dir):
"""Cannot delete protected items.""" """Cannot delete protected items."""
@@ -308,14 +308,14 @@ class TestL1CacheManagerDeletionRules:
output_cached = manager.put(output_file, node_type="effect") output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity( manager.record_simple_activity(
[input_cached.content_hash], [input_cached.cid],
output_cached.content_hash, output_cached.cid,
) )
success, msg = manager.delete_by_content_hash(input_cached.content_hash) success, msg = manager.delete_by_cid(input_cached.cid)
assert success is False assert success is False
assert manager.has_content(input_cached.content_hash) is True assert manager.has_content(input_cached.cid) is True
class TestL1CacheManagerActivityDiscard: class TestL1CacheManagerActivityDiscard:
@@ -330,8 +330,8 @@ class TestL1CacheManagerActivityDiscard:
output_cached = manager.put(output_file, node_type="effect") output_cached = manager.put(output_file, node_type="effect")
activity = manager.record_simple_activity( activity = manager.record_simple_activity(
[input_cached.content_hash], [input_cached.cid],
output_cached.content_hash, output_cached.cid,
"run-001", "run-001",
) )
@@ -347,13 +347,13 @@ class TestL1CacheManagerActivityDiscard:
output_cached = manager.put(output_file, node_type="effect") output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity( manager.record_simple_activity(
[input_cached.content_hash], [input_cached.cid],
output_cached.content_hash, output_cached.cid,
"run-001", "run-001",
) )
# Mark output as pinned (published) # Mark output as pinned (published)
manager.pin(output_cached.content_hash, reason="published") manager.pin(output_cached.cid, reason="published")
can_discard, reason = manager.can_discard_activity("run-001") can_discard, reason = manager.can_discard_activity("run-001")
assert can_discard is False assert can_discard is False
@@ -368,8 +368,8 @@ class TestL1CacheManagerActivityDiscard:
output_cached = manager.put(output_file, node_type="effect") output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity( manager.record_simple_activity(
[input_cached.content_hash], [input_cached.cid],
output_cached.content_hash, output_cached.cid,
"run-001", "run-001",
) )