295 lines
8.8 KiB
Python
295 lines
8.8 KiB
Python
# primitive/registry/registry.py
|
|
"""
|
|
Asset registry for the Art DAG.
|
|
|
|
The registry stores named assets with metadata, enabling:
|
|
- Named references to source files
|
|
- Tagging and categorization
|
|
- Content-addressed deduplication
|
|
- Asset discovery and search
|
|
"""
|
|
|
|
import hashlib
|
|
import json
|
|
import shutil
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
|
|
def _file_hash(path: Path, algorithm: str = "sha3_256") -> str:
|
|
"""
|
|
Compute content hash of a file.
|
|
|
|
Uses SHA-3 (Keccak) by default for quantum resistance.
|
|
SHA-3-256 provides 128-bit security against quantum attacks (Grover's algorithm).
|
|
|
|
Args:
|
|
path: File to hash
|
|
algorithm: Hash algorithm (sha3_256, sha3_512, sha256, blake2b)
|
|
|
|
Returns:
|
|
Full hex digest (no truncation)
|
|
"""
|
|
hasher = hashlib.new(algorithm)
|
|
with open(path, "rb") as f:
|
|
for chunk in iter(lambda: f.read(65536), b""):
|
|
hasher.update(chunk)
|
|
return hasher.hexdigest()
|
|
|
|
|
|
@dataclass
|
|
class Asset:
|
|
"""
|
|
A registered asset in the Art DAG.
|
|
|
|
The cid is the true identifier. URL and local_path are
|
|
locations where the content can be fetched.
|
|
|
|
Attributes:
|
|
name: Unique name for the asset
|
|
cid: SHA-3-256 hash - the canonical identifier
|
|
url: Public URL (canonical location)
|
|
local_path: Optional local path (for local execution)
|
|
asset_type: Type of asset (image, video, audio, etc.)
|
|
tags: List of tags for categorization
|
|
metadata: Additional metadata (dimensions, duration, etc.)
|
|
created_at: Timestamp when added to registry
|
|
"""
|
|
name: str
|
|
cid: str
|
|
url: Optional[str] = None
|
|
local_path: Optional[Path] = None
|
|
asset_type: str = "unknown"
|
|
tags: List[str] = field(default_factory=list)
|
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
created_at: float = field(default_factory=time.time)
|
|
|
|
@property
|
|
def path(self) -> Optional[Path]:
|
|
"""Backwards compatible path property."""
|
|
return self.local_path
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
data = {
|
|
"name": self.name,
|
|
"cid": self.cid,
|
|
"asset_type": self.asset_type,
|
|
"tags": self.tags,
|
|
"metadata": self.metadata,
|
|
"created_at": self.created_at,
|
|
}
|
|
if self.url:
|
|
data["url"] = self.url
|
|
if self.local_path:
|
|
data["local_path"] = str(self.local_path)
|
|
return data
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> "Asset":
|
|
local_path = data.get("local_path") or data.get("path") # backwards compat
|
|
return cls(
|
|
name=data["name"],
|
|
cid=data["cid"],
|
|
url=data.get("url"),
|
|
local_path=Path(local_path) if local_path else None,
|
|
asset_type=data.get("asset_type", "unknown"),
|
|
tags=data.get("tags", []),
|
|
metadata=data.get("metadata", {}),
|
|
created_at=data.get("created_at", time.time()),
|
|
)
|
|
|
|
|
|
class Registry:
|
|
"""
|
|
The Art DAG registry.
|
|
|
|
Stores named assets that can be referenced by DAGs.
|
|
|
|
Structure:
|
|
registry_dir/
|
|
registry.json # Index of all assets
|
|
assets/ # Optional: copied asset files
|
|
<hash>/
|
|
<filename>
|
|
"""
|
|
|
|
def __init__(self, registry_dir: Path | str, copy_assets: bool = False):
|
|
"""
|
|
Initialize the registry.
|
|
|
|
Args:
|
|
registry_dir: Directory to store registry data
|
|
copy_assets: If True, copy assets into registry (content-addressed)
|
|
"""
|
|
self.registry_dir = Path(registry_dir)
|
|
self.registry_dir.mkdir(parents=True, exist_ok=True)
|
|
self.copy_assets = copy_assets
|
|
self._assets: Dict[str, Asset] = {}
|
|
self._load()
|
|
|
|
def _index_path(self) -> Path:
|
|
return self.registry_dir / "registry.json"
|
|
|
|
def _assets_dir(self) -> Path:
|
|
return self.registry_dir / "assets"
|
|
|
|
def _load(self):
|
|
"""Load registry from disk."""
|
|
index_path = self._index_path()
|
|
if index_path.exists():
|
|
with open(index_path) as f:
|
|
data = json.load(f)
|
|
self._assets = {
|
|
name: Asset.from_dict(asset_data)
|
|
for name, asset_data in data.get("assets", {}).items()
|
|
}
|
|
|
|
def _save(self):
|
|
"""Save registry to disk."""
|
|
data = {
|
|
"version": "1.0",
|
|
"assets": {name: asset.to_dict() for name, asset in self._assets.items()},
|
|
}
|
|
with open(self._index_path(), "w") as f:
|
|
json.dump(data, f, indent=2)
|
|
|
|
def add(
|
|
self,
|
|
name: str,
|
|
cid: str,
|
|
url: str = None,
|
|
local_path: Path | str = None,
|
|
asset_type: str = None,
|
|
tags: List[str] = None,
|
|
metadata: Dict[str, Any] = None,
|
|
) -> Asset:
|
|
"""
|
|
Add an asset to the registry.
|
|
|
|
Args:
|
|
name: Unique name for the asset
|
|
cid: SHA-3-256 hash of the content (the canonical identifier)
|
|
url: Public URL where the asset can be fetched
|
|
local_path: Optional local path (for local execution)
|
|
asset_type: Type of asset (image, video, audio, etc.)
|
|
tags: List of tags for categorization
|
|
metadata: Additional metadata
|
|
|
|
Returns:
|
|
The created Asset
|
|
"""
|
|
# Auto-detect asset type from URL or path extension
|
|
if asset_type is None:
|
|
ext = None
|
|
if url:
|
|
ext = Path(url.split("?")[0]).suffix.lower()
|
|
elif local_path:
|
|
ext = Path(local_path).suffix.lower()
|
|
if ext:
|
|
type_map = {
|
|
".jpg": "image", ".jpeg": "image", ".png": "image",
|
|
".gif": "image", ".webp": "image", ".bmp": "image",
|
|
".mp4": "video", ".mkv": "video", ".avi": "video",
|
|
".mov": "video", ".webm": "video",
|
|
".mp3": "audio", ".wav": "audio", ".flac": "audio",
|
|
".ogg": "audio", ".aac": "audio",
|
|
}
|
|
asset_type = type_map.get(ext, "unknown")
|
|
else:
|
|
asset_type = "unknown"
|
|
|
|
asset = Asset(
|
|
name=name,
|
|
cid=cid,
|
|
url=url,
|
|
local_path=Path(local_path).resolve() if local_path else None,
|
|
asset_type=asset_type,
|
|
tags=tags or [],
|
|
metadata=metadata or {},
|
|
)
|
|
|
|
self._assets[name] = asset
|
|
self._save()
|
|
return asset
|
|
|
|
def add_from_file(
|
|
self,
|
|
name: str,
|
|
path: Path | str,
|
|
url: str = None,
|
|
asset_type: str = None,
|
|
tags: List[str] = None,
|
|
metadata: Dict[str, Any] = None,
|
|
) -> Asset:
|
|
"""
|
|
Add an asset from a local file (computes hash automatically).
|
|
|
|
Args:
|
|
name: Unique name for the asset
|
|
path: Path to the source file
|
|
url: Optional public URL
|
|
asset_type: Type of asset (auto-detected if not provided)
|
|
tags: List of tags for categorization
|
|
metadata: Additional metadata
|
|
|
|
Returns:
|
|
The created Asset
|
|
"""
|
|
path = Path(path)
|
|
if not path.exists():
|
|
raise FileNotFoundError(f"Asset file not found: {path}")
|
|
|
|
cid = _file_hash(path)
|
|
|
|
return self.add(
|
|
name=name,
|
|
cid=cid,
|
|
url=url,
|
|
local_path=path,
|
|
asset_type=asset_type,
|
|
tags=tags,
|
|
metadata=metadata,
|
|
)
|
|
|
|
def get(self, name: str) -> Optional[Asset]:
|
|
"""Get an asset by name."""
|
|
return self._assets.get(name)
|
|
|
|
def remove(self, name: str) -> bool:
|
|
"""Remove an asset from the registry."""
|
|
if name not in self._assets:
|
|
return False
|
|
del self._assets[name]
|
|
self._save()
|
|
return True
|
|
|
|
def list(self) -> List[Asset]:
|
|
"""List all assets."""
|
|
return list(self._assets.values())
|
|
|
|
def find_by_tag(self, tag: str) -> List[Asset]:
|
|
"""Find assets with a specific tag."""
|
|
return [a for a in self._assets.values() if tag in a.tags]
|
|
|
|
def find_by_type(self, asset_type: str) -> List[Asset]:
|
|
"""Find assets of a specific type."""
|
|
return [a for a in self._assets.values() if a.asset_type == asset_type]
|
|
|
|
def find_by_hash(self, cid: str) -> Optional[Asset]:
|
|
"""Find an asset by content hash."""
|
|
for asset in self._assets.values():
|
|
if asset.cid == cid:
|
|
return asset
|
|
return None
|
|
|
|
def __contains__(self, name: str) -> bool:
|
|
return name in self._assets
|
|
|
|
def __len__(self) -> int:
|
|
return len(self._assets)
|
|
|
|
def __iter__(self):
|
|
return iter(self._assets.values())
|