Rename content_hash/output_hash to cid throughout
Refactor to use IPFS CID as the primary content identifier: - Update database schema: content_hash -> cid, output_hash -> output_cid - Update all services, routers, and tasks to use cid terminology - Update HTML templates to display CID instead of hash - Update cache_manager parameter names - Update README documentation This completes the transition to CID-only content addressing. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
259
cache_manager.py
259
cache_manager.py
@@ -3,7 +3,7 @@
|
||||
Cache management for Art DAG L1 server.
|
||||
|
||||
Integrates artdag's Cache, ActivityStore, and ActivityManager to provide:
|
||||
- Content-addressed caching with both node_id and content_hash
|
||||
- Content-addressed caching with both node_id and cid
|
||||
- Activity tracking for runs (input/output/intermediate relationships)
|
||||
- Deletion rules enforcement (shared items protected)
|
||||
- L2 ActivityPub integration for "shared" status checks
|
||||
@@ -35,7 +35,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def file_hash(path: Path, algorithm: str = "sha3_256") -> str:
|
||||
"""Compute SHA3-256 hash of a file."""
|
||||
"""Compute local content hash (fallback when IPFS unavailable)."""
|
||||
hasher = hashlib.new(algorithm)
|
||||
actual_path = path.resolve() if path.is_symlink() else path
|
||||
with open(actual_path, "rb") as f:
|
||||
@@ -51,10 +51,10 @@ class CachedFile:
|
||||
|
||||
Provides a unified view combining:
|
||||
- node_id: computation identity (for DAG caching)
|
||||
- content_hash: file content identity (for external references)
|
||||
- cid: file content identity (for external references)
|
||||
"""
|
||||
node_id: str
|
||||
content_hash: str
|
||||
cid: str
|
||||
path: Path
|
||||
size_bytes: int
|
||||
node_type: str
|
||||
@@ -64,7 +64,7 @@ class CachedFile:
|
||||
def from_cache_entry(cls, entry: CacheEntry) -> "CachedFile":
|
||||
return cls(
|
||||
node_id=entry.node_id,
|
||||
content_hash=entry.content_hash,
|
||||
cid=entry.cid,
|
||||
path=entry.output_path,
|
||||
size_bytes=entry.size_bytes,
|
||||
node_type=entry.node_type,
|
||||
@@ -84,41 +84,41 @@ class L2SharedChecker:
|
||||
self.cache_ttl = cache_ttl
|
||||
self._cache: Dict[str, tuple[bool, float]] = {}
|
||||
|
||||
def is_shared(self, content_hash: str) -> bool:
|
||||
"""Check if content_hash has been published to L2."""
|
||||
def is_shared(self, cid: str) -> bool:
|
||||
"""Check if cid has been published to L2."""
|
||||
import time
|
||||
now = time.time()
|
||||
|
||||
# Check cache
|
||||
if content_hash in self._cache:
|
||||
is_shared, cached_at = self._cache[content_hash]
|
||||
if cid in self._cache:
|
||||
is_shared, cached_at = self._cache[cid]
|
||||
if now - cached_at < self.cache_ttl:
|
||||
logger.debug(f"L2 check (cached): {content_hash[:16]}... = {is_shared}")
|
||||
logger.debug(f"L2 check (cached): {cid[:16]}... = {is_shared}")
|
||||
return is_shared
|
||||
|
||||
# Query L2
|
||||
try:
|
||||
url = f"{self.l2_server}/assets/by-hash/{content_hash}"
|
||||
url = f"{self.l2_server}/assets/by-hash/{cid}"
|
||||
logger.info(f"L2 check: GET {url}")
|
||||
resp = requests.get(url, timeout=5)
|
||||
logger.info(f"L2 check response: {resp.status_code}")
|
||||
is_shared = resp.status_code == 200
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to check L2 for {content_hash}: {e}")
|
||||
logger.warning(f"Failed to check L2 for {cid}: {e}")
|
||||
# On error, assume IS shared (safer - prevents accidental deletion)
|
||||
is_shared = True
|
||||
|
||||
self._cache[content_hash] = (is_shared, now)
|
||||
self._cache[cid] = (is_shared, now)
|
||||
return is_shared
|
||||
|
||||
def invalidate(self, content_hash: str):
|
||||
"""Invalidate cache for a content_hash (call after publishing)."""
|
||||
self._cache.pop(content_hash, None)
|
||||
def invalidate(self, cid: str):
|
||||
"""Invalidate cache for a cid (call after publishing)."""
|
||||
self._cache.pop(cid, None)
|
||||
|
||||
def mark_shared(self, content_hash: str):
|
||||
def mark_shared(self, cid: str):
|
||||
"""Mark as shared without querying (call after successful publish)."""
|
||||
import time
|
||||
self._cache[content_hash] = (True, time.time())
|
||||
self._cache[cid] = (True, time.time())
|
||||
|
||||
|
||||
class L1CacheManager:
|
||||
@@ -131,7 +131,7 @@ class L1CacheManager:
|
||||
- ActivityManager for deletion rules
|
||||
- L2 integration for shared status
|
||||
|
||||
Provides both node_id and content_hash based access.
|
||||
Provides both node_id and cid based access.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -162,16 +162,16 @@ class L1CacheManager:
|
||||
is_shared_fn=self._is_shared_by_node_id,
|
||||
)
|
||||
|
||||
# Content hash index: content_hash -> node_id
|
||||
# Content hash index: cid -> node_id
|
||||
# Uses Redis if available, falls back to in-memory dict
|
||||
self._content_index: Dict[str, str] = {}
|
||||
self._load_content_index()
|
||||
|
||||
# IPFS CID index: content_hash -> ipfs_cid
|
||||
# IPFS CID index: cid -> ipfs_cid
|
||||
self._ipfs_cids: Dict[str, str] = {}
|
||||
self._load_ipfs_index()
|
||||
|
||||
# Legacy files directory (for files uploaded directly by content_hash)
|
||||
# Legacy files directory (for files uploaded directly by cid)
|
||||
self.legacy_dir = self.cache_dir / "legacy"
|
||||
self.legacy_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@@ -179,7 +179,7 @@ class L1CacheManager:
|
||||
return self.cache_dir / "content_index.json"
|
||||
|
||||
def _load_content_index(self):
|
||||
"""Load content_hash -> node_id index from Redis or JSON file."""
|
||||
"""Load cid -> node_id index from Redis or JSON file."""
|
||||
# If Redis available and has data, use it
|
||||
if self._redis:
|
||||
try:
|
||||
@@ -206,8 +206,8 @@ class L1CacheManager:
|
||||
|
||||
# Also index from existing cache entries
|
||||
for entry in self.cache.list_entries():
|
||||
if entry.content_hash:
|
||||
self._content_index[entry.content_hash] = entry.node_id
|
||||
if entry.cid:
|
||||
self._content_index[entry.cid] = entry.node_id
|
||||
|
||||
# Migrate to Redis if available
|
||||
if self._redis and self._content_index:
|
||||
@@ -218,39 +218,39 @@ class L1CacheManager:
|
||||
logger.warning(f"Failed to migrate content index to Redis: {e}")
|
||||
|
||||
def _save_content_index(self):
|
||||
"""Save content_hash -> node_id index to Redis and JSON file."""
|
||||
"""Save cid -> node_id index to Redis and JSON file."""
|
||||
# Always save to JSON as backup
|
||||
with open(self._index_path(), "w") as f:
|
||||
json.dump(self._content_index, f, indent=2)
|
||||
|
||||
def _set_content_index(self, content_hash: str, node_id: str):
|
||||
def _set_content_index(self, cid: str, node_id: str):
|
||||
"""Set a single content index entry (Redis + in-memory)."""
|
||||
self._content_index[content_hash] = node_id
|
||||
self._content_index[cid] = node_id
|
||||
if self._redis:
|
||||
try:
|
||||
self._redis.hset(self._redis_content_key, content_hash, node_id)
|
||||
self._redis.hset(self._redis_content_key, cid, node_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to set content index in Redis: {e}")
|
||||
self._save_content_index()
|
||||
|
||||
def _get_content_index(self, content_hash: str) -> Optional[str]:
|
||||
def _get_content_index(self, cid: str) -> Optional[str]:
|
||||
"""Get a content index entry (Redis-first, then in-memory)."""
|
||||
if self._redis:
|
||||
try:
|
||||
val = self._redis.hget(self._redis_content_key, content_hash)
|
||||
val = self._redis.hget(self._redis_content_key, cid)
|
||||
if val:
|
||||
return val.decode() if isinstance(val, bytes) else val
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get content index from Redis: {e}")
|
||||
return self._content_index.get(content_hash)
|
||||
return self._content_index.get(cid)
|
||||
|
||||
def _del_content_index(self, content_hash: str):
|
||||
def _del_content_index(self, cid: str):
|
||||
"""Delete a content index entry."""
|
||||
if content_hash in self._content_index:
|
||||
del self._content_index[content_hash]
|
||||
if cid in self._content_index:
|
||||
del self._content_index[cid]
|
||||
if self._redis:
|
||||
try:
|
||||
self._redis.hdel(self._redis_content_key, content_hash)
|
||||
self._redis.hdel(self._redis_content_key, cid)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete content index from Redis: {e}")
|
||||
self._save_content_index()
|
||||
@@ -259,7 +259,7 @@ class L1CacheManager:
|
||||
return self.cache_dir / "ipfs_index.json"
|
||||
|
||||
def _load_ipfs_index(self):
|
||||
"""Load content_hash -> ipfs_cid index from Redis or JSON file."""
|
||||
"""Load cid -> ipfs_cid index from Redis or JSON file."""
|
||||
# If Redis available and has data, use it
|
||||
if self._redis:
|
||||
try:
|
||||
@@ -293,71 +293,71 @@ class L1CacheManager:
|
||||
logger.warning(f"Failed to migrate IPFS index to Redis: {e}")
|
||||
|
||||
def _save_ipfs_index(self):
|
||||
"""Save content_hash -> ipfs_cid index to JSON file (backup)."""
|
||||
"""Save cid -> ipfs_cid index to JSON file (backup)."""
|
||||
with open(self._ipfs_index_path(), "w") as f:
|
||||
json.dump(self._ipfs_cids, f, indent=2)
|
||||
|
||||
def _set_ipfs_index(self, content_hash: str, ipfs_cid: str):
|
||||
def _set_ipfs_index(self, cid: str, ipfs_cid: str):
|
||||
"""Set a single IPFS index entry (Redis + in-memory)."""
|
||||
self._ipfs_cids[content_hash] = ipfs_cid
|
||||
self._ipfs_cids[cid] = ipfs_cid
|
||||
if self._redis:
|
||||
try:
|
||||
self._redis.hset(self._redis_ipfs_key, content_hash, ipfs_cid)
|
||||
self._redis.hset(self._redis_ipfs_key, cid, ipfs_cid)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to set IPFS index in Redis: {e}")
|
||||
self._save_ipfs_index()
|
||||
|
||||
def _get_ipfs_cid_from_index(self, content_hash: str) -> Optional[str]:
|
||||
def _get_ipfs_cid_from_index(self, cid: str) -> Optional[str]:
|
||||
"""Get IPFS CID from index (Redis-first, then in-memory)."""
|
||||
if self._redis:
|
||||
try:
|
||||
val = self._redis.hget(self._redis_ipfs_key, content_hash)
|
||||
val = self._redis.hget(self._redis_ipfs_key, cid)
|
||||
if val:
|
||||
return val.decode() if isinstance(val, bytes) else val
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get IPFS CID from Redis: {e}")
|
||||
return self._ipfs_cids.get(content_hash)
|
||||
return self._ipfs_cids.get(cid)
|
||||
|
||||
def get_ipfs_cid(self, content_hash: str) -> Optional[str]:
|
||||
def get_ipfs_cid(self, cid: str) -> Optional[str]:
|
||||
"""Get IPFS CID for a content hash."""
|
||||
return self._get_ipfs_cid_from_index(content_hash)
|
||||
return self._get_ipfs_cid_from_index(cid)
|
||||
|
||||
def _is_shared_by_node_id(self, content_hash: str) -> bool:
|
||||
"""Check if a content_hash is shared via L2."""
|
||||
return self.l2_checker.is_shared(content_hash)
|
||||
def _is_shared_by_node_id(self, cid: str) -> bool:
|
||||
"""Check if a cid is shared via L2."""
|
||||
return self.l2_checker.is_shared(cid)
|
||||
|
||||
def _load_meta(self, content_hash: str) -> dict:
|
||||
def _load_meta(self, cid: str) -> dict:
|
||||
"""Load metadata for a cached file."""
|
||||
meta_path = self.cache_dir / f"{content_hash}.meta.json"
|
||||
meta_path = self.cache_dir / f"{cid}.meta.json"
|
||||
if meta_path.exists():
|
||||
with open(meta_path) as f:
|
||||
return json.load(f)
|
||||
return {}
|
||||
|
||||
def is_pinned(self, content_hash: str) -> tuple[bool, str]:
|
||||
def is_pinned(self, cid: str) -> tuple[bool, str]:
|
||||
"""
|
||||
Check if a content_hash is pinned (non-deletable).
|
||||
Check if a cid is pinned (non-deletable).
|
||||
|
||||
Returns:
|
||||
(is_pinned, reason) tuple
|
||||
"""
|
||||
meta = self._load_meta(content_hash)
|
||||
meta = self._load_meta(cid)
|
||||
if meta.get("pinned"):
|
||||
return True, meta.get("pin_reason", "published")
|
||||
return False, ""
|
||||
|
||||
def _save_meta(self, content_hash: str, **updates) -> dict:
|
||||
def _save_meta(self, cid: str, **updates) -> dict:
|
||||
"""Save/update metadata for a cached file."""
|
||||
meta = self._load_meta(content_hash)
|
||||
meta = self._load_meta(cid)
|
||||
meta.update(updates)
|
||||
meta_path = self.cache_dir / f"{content_hash}.meta.json"
|
||||
meta_path = self.cache_dir / f"{cid}.meta.json"
|
||||
with open(meta_path, "w") as f:
|
||||
json.dump(meta, f, indent=2)
|
||||
return meta
|
||||
|
||||
def pin(self, content_hash: str, reason: str = "published") -> None:
|
||||
def pin(self, cid: str, reason: str = "published") -> None:
|
||||
"""Mark an item as pinned (non-deletable)."""
|
||||
self._save_meta(content_hash, pinned=True, pin_reason=reason)
|
||||
self._save_meta(cid, pinned=True, pin_reason=reason)
|
||||
|
||||
# ============ File Storage ============
|
||||
|
||||
@@ -375,31 +375,28 @@ class L1CacheManager:
|
||||
Args:
|
||||
source_path: Path to file to cache
|
||||
node_type: Type of node (e.g., "upload", "source", "effect")
|
||||
node_id: Optional node_id; if not provided, uses content_hash
|
||||
node_id: Optional node_id; if not provided, uses CID
|
||||
execution_time: How long the operation took
|
||||
move: If True, move instead of copy
|
||||
|
||||
Returns:
|
||||
Tuple of (CachedFile with both node_id and content_hash, IPFS CID or None)
|
||||
Tuple of (CachedFile with both node_id and cid, CID)
|
||||
"""
|
||||
# Compute content hash first
|
||||
content_hash = file_hash(source_path)
|
||||
# Upload to IPFS first to get the CID (primary identifier)
|
||||
cid = ipfs_client.add_file(source_path)
|
||||
if not cid:
|
||||
# Fallback to local hash if IPFS unavailable
|
||||
cid = file_hash(source_path)
|
||||
logger.warning(f"IPFS unavailable, using local hash: {cid[:16]}...")
|
||||
|
||||
# Use content_hash as node_id if not provided
|
||||
# This is for legacy/uploaded files that don't have a DAG node
|
||||
# Use CID as node_id if not provided
|
||||
if node_id is None:
|
||||
node_id = content_hash
|
||||
node_id = cid
|
||||
|
||||
# Check if already cached (by node_id)
|
||||
existing = self.cache.get_entry(node_id)
|
||||
if existing and existing.output_path.exists():
|
||||
# Already cached - still try to get IPFS CID if we don't have it
|
||||
ipfs_cid = self._get_ipfs_cid_from_index(content_hash)
|
||||
if not ipfs_cid:
|
||||
ipfs_cid = ipfs_client.add_file(existing.output_path)
|
||||
if ipfs_cid:
|
||||
self._set_ipfs_index(content_hash, ipfs_cid)
|
||||
return CachedFile.from_cache_entry(existing), ipfs_cid
|
||||
return CachedFile.from_cache_entry(existing), cid
|
||||
|
||||
# Store in local cache
|
||||
self.cache.put(
|
||||
@@ -412,16 +409,12 @@ class L1CacheManager:
|
||||
|
||||
entry = self.cache.get_entry(node_id)
|
||||
|
||||
# Update content index (Redis + local)
|
||||
self._set_content_index(entry.content_hash, node_id)
|
||||
# Update content index (CID -> node_id mapping)
|
||||
self._set_content_index(cid, node_id)
|
||||
|
||||
# Upload to IPFS (async in background would be better, but sync for now)
|
||||
ipfs_cid = ipfs_client.add_file(entry.output_path)
|
||||
if ipfs_cid:
|
||||
self._set_ipfs_index(entry.content_hash, ipfs_cid)
|
||||
logger.info(f"Uploaded to IPFS: {entry.content_hash[:16]}... -> {ipfs_cid}")
|
||||
logger.info(f"Cached: {cid[:16]}...")
|
||||
|
||||
return CachedFile.from_cache_entry(entry), ipfs_cid
|
||||
return CachedFile.from_cache_entry(entry), cid
|
||||
|
||||
def get_by_node_id(self, node_id: str) -> Optional[Path]:
|
||||
"""Get cached file path by node_id."""
|
||||
@@ -432,46 +425,46 @@ class L1CacheManager:
|
||||
# CIDv0 starts with "Qm", CIDv1 starts with "bafy" or other multibase prefixes
|
||||
return identifier.startswith("Qm") or identifier.startswith("bafy") or identifier.startswith("baf")
|
||||
|
||||
def get_by_content_hash(self, content_hash: str) -> Optional[Path]:
|
||||
"""Get cached file path by content_hash or IPFS CID. Falls back to IPFS if not in local cache."""
|
||||
def get_by_cid(self, cid: str) -> Optional[Path]:
|
||||
"""Get cached file path by cid or IPFS CID. Falls back to IPFS if not in local cache."""
|
||||
|
||||
# If it looks like an IPFS CID, use get_by_cid instead
|
||||
if self._is_ipfs_cid(content_hash):
|
||||
return self.get_by_cid(content_hash)
|
||||
if self._is_ipfs_cid(cid):
|
||||
return self.get_by_cid(cid)
|
||||
|
||||
# Check index first (Redis then local)
|
||||
node_id = self._get_content_index(content_hash)
|
||||
node_id = self._get_content_index(cid)
|
||||
if node_id:
|
||||
path = self.cache.get(node_id)
|
||||
if path and path.exists():
|
||||
logger.debug(f" Found via index: {path}")
|
||||
return path
|
||||
|
||||
# For uploads, node_id == content_hash, so try direct lookup
|
||||
# For uploads, node_id == cid, so try direct lookup
|
||||
# This works even if cache index hasn't been reloaded
|
||||
path = self.cache.get(content_hash)
|
||||
logger.debug(f" cache.get({content_hash[:16]}...) returned: {path}")
|
||||
path = self.cache.get(cid)
|
||||
logger.debug(f" cache.get({cid[:16]}...) returned: {path}")
|
||||
if path and path.exists():
|
||||
self._set_content_index(content_hash, content_hash)
|
||||
self._set_content_index(cid, cid)
|
||||
return path
|
||||
|
||||
# Scan cache entries (fallback for new structure)
|
||||
entry = self.cache.find_by_content_hash(content_hash)
|
||||
entry = self.cache.find_by_cid(cid)
|
||||
if entry and entry.output_path.exists():
|
||||
logger.debug(f" Found via scan: {entry.output_path}")
|
||||
self._set_content_index(content_hash, entry.node_id)
|
||||
self._set_content_index(cid, entry.node_id)
|
||||
return entry.output_path
|
||||
|
||||
# Check legacy location (files stored directly as CACHE_DIR/{content_hash})
|
||||
legacy_path = self.cache_dir / content_hash
|
||||
# Check legacy location (files stored directly as CACHE_DIR/{cid})
|
||||
legacy_path = self.cache_dir / cid
|
||||
if legacy_path.exists() and legacy_path.is_file():
|
||||
return legacy_path
|
||||
|
||||
# Try to recover from IPFS if we have a CID
|
||||
ipfs_cid = self._get_ipfs_cid_from_index(content_hash)
|
||||
ipfs_cid = self._get_ipfs_cid_from_index(cid)
|
||||
if ipfs_cid:
|
||||
logger.info(f"Recovering from IPFS: {content_hash[:16]}... ({ipfs_cid})")
|
||||
recovery_path = self.legacy_dir / content_hash
|
||||
logger.info(f"Recovering from IPFS: {cid[:16]}... ({ipfs_cid})")
|
||||
recovery_path = self.legacy_dir / cid
|
||||
if ipfs_client.get_file(ipfs_cid, recovery_path):
|
||||
logger.info(f"Recovered from IPFS: {recovery_path}")
|
||||
return recovery_path
|
||||
@@ -504,16 +497,16 @@ class L1CacheManager:
|
||||
|
||||
return None
|
||||
|
||||
def has_content(self, content_hash: str) -> bool:
|
||||
def has_content(self, cid: str) -> bool:
|
||||
"""Check if content exists in cache."""
|
||||
return self.get_by_content_hash(content_hash) is not None
|
||||
return self.get_by_cid(cid) is not None
|
||||
|
||||
def get_entry_by_content_hash(self, content_hash: str) -> Optional[CacheEntry]:
|
||||
"""Get cache entry by content_hash."""
|
||||
node_id = self._get_content_index(content_hash)
|
||||
def get_entry_by_cid(self, cid: str) -> Optional[CacheEntry]:
|
||||
"""Get cache entry by cid."""
|
||||
node_id = self._get_content_index(cid)
|
||||
if node_id:
|
||||
return self.cache.get_entry(node_id)
|
||||
return self.cache.find_by_content_hash(content_hash)
|
||||
return self.cache.find_by_cid(cid)
|
||||
|
||||
def list_all(self) -> List[CachedFile]:
|
||||
"""List all cached files."""
|
||||
@@ -523,11 +516,11 @@ class L1CacheManager:
|
||||
# New cache structure entries
|
||||
for entry in self.cache.list_entries():
|
||||
files.append(CachedFile.from_cache_entry(entry))
|
||||
if entry.content_hash:
|
||||
seen_hashes.add(entry.content_hash)
|
||||
if entry.cid:
|
||||
seen_hashes.add(entry.cid)
|
||||
|
||||
# Legacy files stored directly in cache_dir (old structure)
|
||||
# These are files named by content_hash directly in CACHE_DIR
|
||||
# These are files named by cid directly in CACHE_DIR
|
||||
for f in self.cache_dir.iterdir():
|
||||
# Skip directories and special files
|
||||
if not f.is_file():
|
||||
@@ -544,7 +537,7 @@ class L1CacheManager:
|
||||
|
||||
files.append(CachedFile(
|
||||
node_id=f.name,
|
||||
content_hash=f.name,
|
||||
cid=f.name,
|
||||
path=f,
|
||||
size_bytes=f.stat().st_size,
|
||||
node_type="legacy",
|
||||
@@ -566,8 +559,8 @@ class L1CacheManager:
|
||||
"""
|
||||
hashes = []
|
||||
for entry in self.cache.list_entries():
|
||||
if entry.node_type == node_type and entry.content_hash:
|
||||
hashes.append(entry.content_hash)
|
||||
if entry.node_type == node_type and entry.cid:
|
||||
hashes.append(entry.cid)
|
||||
return hashes
|
||||
|
||||
# ============ Activity Tracking ============
|
||||
@@ -590,19 +583,19 @@ class L1CacheManager:
|
||||
def record_simple_activity(
|
||||
self,
|
||||
input_hashes: List[str],
|
||||
output_hash: str,
|
||||
output_cid: str,
|
||||
run_id: str = None,
|
||||
) -> Activity:
|
||||
"""
|
||||
Record a simple (non-DAG) execution as an activity.
|
||||
|
||||
For legacy single-effect runs that don't use full DAG execution.
|
||||
Uses content_hash as node_id.
|
||||
Uses cid as node_id.
|
||||
"""
|
||||
activity = Activity(
|
||||
activity_id=run_id or str(hash((tuple(input_hashes), output_hash))),
|
||||
activity_id=run_id or str(hash((tuple(input_hashes), output_cid))),
|
||||
input_ids=sorted(input_hashes),
|
||||
output_id=output_hash,
|
||||
output_id=output_cid,
|
||||
intermediate_ids=[],
|
||||
created_at=datetime.now(timezone.utc).timestamp(),
|
||||
status="completed",
|
||||
@@ -624,7 +617,7 @@ class L1CacheManager:
|
||||
|
||||
# ============ Deletion Rules ============
|
||||
|
||||
def can_delete(self, content_hash: str) -> tuple[bool, str]:
|
||||
def can_delete(self, cid: str) -> tuple[bool, str]:
|
||||
"""
|
||||
Check if a cached item can be deleted.
|
||||
|
||||
@@ -632,12 +625,12 @@ class L1CacheManager:
|
||||
(can_delete, reason) tuple
|
||||
"""
|
||||
# Check if pinned (published or input to published)
|
||||
pinned, reason = self.is_pinned(content_hash)
|
||||
pinned, reason = self.is_pinned(cid)
|
||||
if pinned:
|
||||
return False, f"Item is pinned ({reason})"
|
||||
|
||||
# Find node_id for this content
|
||||
node_id = self._get_content_index(content_hash) or content_hash
|
||||
node_id = self._get_content_index(cid) or cid
|
||||
|
||||
# Check if it's an input or output of any activity
|
||||
for activity in self.activity_store.list():
|
||||
@@ -663,34 +656,34 @@ class L1CacheManager:
|
||||
for node_id in activity.all_node_ids:
|
||||
entry = self.cache.get_entry(node_id)
|
||||
if entry:
|
||||
pinned, reason = self.is_pinned(entry.content_hash)
|
||||
pinned, reason = self.is_pinned(entry.cid)
|
||||
if pinned:
|
||||
return False, f"Item {node_id} is pinned ({reason})"
|
||||
|
||||
return True, "OK"
|
||||
|
||||
def delete_by_content_hash(self, content_hash: str) -> tuple[bool, str]:
|
||||
def delete_by_cid(self, cid: str) -> tuple[bool, str]:
|
||||
"""
|
||||
Delete a cached item by content_hash.
|
||||
Delete a cached item by cid.
|
||||
|
||||
Enforces deletion rules.
|
||||
|
||||
Returns:
|
||||
(success, message) tuple
|
||||
"""
|
||||
can_delete, reason = self.can_delete(content_hash)
|
||||
can_delete, reason = self.can_delete(cid)
|
||||
if not can_delete:
|
||||
return False, reason
|
||||
|
||||
# Find and delete
|
||||
node_id = self._get_content_index(content_hash)
|
||||
node_id = self._get_content_index(cid)
|
||||
if node_id:
|
||||
self.cache.remove(node_id)
|
||||
self._del_content_index(content_hash)
|
||||
self._del_content_index(cid)
|
||||
return True, "Deleted"
|
||||
|
||||
# Try legacy
|
||||
legacy_path = self.legacy_dir / content_hash
|
||||
legacy_path = self.legacy_dir / cid
|
||||
if legacy_path.exists():
|
||||
legacy_path.unlink()
|
||||
return True, "Deleted (legacy)"
|
||||
@@ -732,7 +725,7 @@ class L1CacheManager:
|
||||
if activity.output_id:
|
||||
entry = self.cache.get_entry(activity.output_id)
|
||||
if entry:
|
||||
pinned, reason = self.is_pinned(entry.content_hash)
|
||||
pinned, reason = self.is_pinned(entry.cid)
|
||||
if pinned:
|
||||
return False, f"Output is pinned ({reason})"
|
||||
|
||||
@@ -743,9 +736,9 @@ class L1CacheManager:
|
||||
# Remove from cache
|
||||
self.cache.remove(activity.output_id)
|
||||
# Remove from content index (Redis + local)
|
||||
self._del_content_index(entry.content_hash)
|
||||
self._del_content_index(entry.cid)
|
||||
# Delete from legacy dir if exists
|
||||
legacy_path = self.legacy_dir / entry.content_hash
|
||||
legacy_path = self.legacy_dir / entry.cid
|
||||
if legacy_path.exists():
|
||||
legacy_path.unlink()
|
||||
|
||||
@@ -754,8 +747,8 @@ class L1CacheManager:
|
||||
entry = self.cache.get_entry(node_id)
|
||||
if entry:
|
||||
self.cache.remove(node_id)
|
||||
self._del_content_index(entry.content_hash)
|
||||
legacy_path = self.legacy_dir / entry.content_hash
|
||||
self._del_content_index(entry.cid)
|
||||
legacy_path = self.legacy_dir / entry.cid
|
||||
if legacy_path.exists():
|
||||
legacy_path.unlink()
|
||||
|
||||
@@ -777,13 +770,13 @@ class L1CacheManager:
|
||||
|
||||
# ============ L2 Integration ============
|
||||
|
||||
def mark_published(self, content_hash: str):
|
||||
"""Mark a content_hash as published to L2."""
|
||||
self.l2_checker.mark_shared(content_hash)
|
||||
def mark_published(self, cid: str):
|
||||
"""Mark a cid as published to L2."""
|
||||
self.l2_checker.mark_shared(cid)
|
||||
|
||||
def invalidate_shared_cache(self, content_hash: str):
|
||||
def invalidate_shared_cache(self, cid: str):
|
||||
"""Invalidate shared status cache (call if item might be unpublished)."""
|
||||
self.l2_checker.invalidate(content_hash)
|
||||
self.l2_checker.invalidate(cid)
|
||||
|
||||
# ============ Stats ============
|
||||
|
||||
|
||||
Reference in New Issue
Block a user