Store cache items by IPFS CID, index by cache_id

- Files in /data/cache/nodes/ are now stored by IPFS CID only
- cache_id parameter creates index from cache_id -> IPFS CID
- Removed deprecated node_id parameter behavior
- get_by_cid(cache_id) still works via index lookup

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-13 04:20:34 +00:00
parent c46fcd2308
commit d7d7cd28c2
2 changed files with 28 additions and 34 deletions

View File

@@ -267,16 +267,21 @@ class L1CacheManager:
source_path: Path,
node_type: str = "upload",
node_id: str = None,
cache_id: str = None,
execution_time: float = 0.0,
move: bool = False,
) -> tuple[CachedFile, Optional[str]]:
"""
Store a file in the cache and upload to IPFS.
Files are ALWAYS stored by IPFS CID. The cache_id parameter creates
an index from cache_id -> IPFS CID for code-addressed lookups.
Args:
source_path: Path to file to cache
node_type: Type of node (e.g., "upload", "source", "effect")
node_id: Optional node_id; if not provided, uses CID
node_id: DEPRECATED - ignored, always uses IPFS CID
cache_id: Optional code-addressed cache ID to index
execution_time: How long the operation took
move: If True, move instead of copy
@@ -288,9 +293,8 @@ class L1CacheManager:
if not cid:
raise RuntimeError(f"IPFS upload failed for {source_path}. IPFS is required.")
# Use CID as node_id if not provided
if node_id is None:
node_id = cid
# Always store by IPFS CID (node_id parameter is deprecated)
node_id = cid
# Check if already cached (by node_id)
existing = self.cache.get_entry(node_id)
@@ -319,21 +323,16 @@ class L1CacheManager:
verify_path = self.cache.get(node_id)
logger.info(f"put: Verify cache.get(node_id={node_id[:16]}...) = {verify_path}")
# Update content index (CID -> node_id mapping)
self._set_content_index(cid, node_id)
logger.info(f"put: Set content index {cid[:16]}... -> {node_id[:16]}...")
# Index by cache_id if provided (code-addressed cache lookup)
# This allows get_by_cid(cache_id) to find files stored by IPFS CID
if cache_id and cache_id != cid:
self._set_content_index(cache_id, cid)
logger.info(f"put: Indexed cache_id {cache_id[:16]}... -> IPFS {cid}")
# Also index by node_id itself (for code-addressed cache lookups)
# This allows get_by_cid(cache_id) to work when cache_id != IPFS CID
if node_id != cid:
self._set_content_index(node_id, node_id)
logger.debug(f"Self-indexed: {node_id[:16]}... -> {node_id[:16]}...")
# Also index by local hash if cid is an IPFS CID
# This ensures both IPFS CID and local hash can be used to find the file
# Also index by local hash for content-based lookup
if local_hash and local_hash != cid:
self._set_content_index(local_hash, node_id)
logger.debug(f"Dual-indexed: {local_hash[:16]}... -> {node_id}")
self._set_content_index(local_hash, cid)
logger.debug(f"Indexed local hash {local_hash[:16]}... -> IPFS {cid}")
logger.info(f"Cached: {cid[:16]}...")