Squashed 'core/' content from commit 4957443
git-subtree-dir: core git-subtree-split: 4957443184ae0eb6323635a90a19acffb3e01d07
This commit is contained in:
20
artdag/registry/__init__.py
Normal file
20
artdag/registry/__init__.py
Normal file
@@ -0,0 +1,20 @@
|
||||
# primitive/registry/__init__.py
|
||||
"""
|
||||
Art DAG Registry.
|
||||
|
||||
The registry is the foundational data structure that maps named assets
|
||||
to their source paths or content-addressed IDs. Assets in the registry
|
||||
can be referenced by DAGs.
|
||||
|
||||
Example:
|
||||
registry = Registry("/path/to/registry")
|
||||
registry.add("cat", "/path/to/cat.jpg", tags=["animal", "photo"])
|
||||
|
||||
# Later, in a DAG:
|
||||
builder = DAGBuilder()
|
||||
cat = builder.source(registry.get("cat").path)
|
||||
"""
|
||||
|
||||
from .registry import Registry, Asset
|
||||
|
||||
__all__ = ["Registry", "Asset"]
|
||||
294
artdag/registry/registry.py
Normal file
294
artdag/registry/registry.py
Normal file
@@ -0,0 +1,294 @@
|
||||
# primitive/registry/registry.py
|
||||
"""
|
||||
Asset registry for the Art DAG.
|
||||
|
||||
The registry stores named assets with metadata, enabling:
|
||||
- Named references to source files
|
||||
- Tagging and categorization
|
||||
- Content-addressed deduplication
|
||||
- Asset discovery and search
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import shutil
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
def _file_hash(path: Path, algorithm: str = "sha3_256") -> str:
|
||||
"""
|
||||
Compute content hash of a file.
|
||||
|
||||
Uses SHA-3 (Keccak) by default for quantum resistance.
|
||||
SHA-3-256 provides 128-bit security against quantum attacks (Grover's algorithm).
|
||||
|
||||
Args:
|
||||
path: File to hash
|
||||
algorithm: Hash algorithm (sha3_256, sha3_512, sha256, blake2b)
|
||||
|
||||
Returns:
|
||||
Full hex digest (no truncation)
|
||||
"""
|
||||
hasher = hashlib.new(algorithm)
|
||||
with open(path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
hasher.update(chunk)
|
||||
return hasher.hexdigest()
|
||||
|
||||
|
||||
@dataclass
|
||||
class Asset:
|
||||
"""
|
||||
A registered asset in the Art DAG.
|
||||
|
||||
The cid is the true identifier. URL and local_path are
|
||||
locations where the content can be fetched.
|
||||
|
||||
Attributes:
|
||||
name: Unique name for the asset
|
||||
cid: SHA-3-256 hash - the canonical identifier
|
||||
url: Public URL (canonical location)
|
||||
local_path: Optional local path (for local execution)
|
||||
asset_type: Type of asset (image, video, audio, etc.)
|
||||
tags: List of tags for categorization
|
||||
metadata: Additional metadata (dimensions, duration, etc.)
|
||||
created_at: Timestamp when added to registry
|
||||
"""
|
||||
name: str
|
||||
cid: str
|
||||
url: Optional[str] = None
|
||||
local_path: Optional[Path] = None
|
||||
asset_type: str = "unknown"
|
||||
tags: List[str] = field(default_factory=list)
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
created_at: float = field(default_factory=time.time)
|
||||
|
||||
@property
|
||||
def path(self) -> Optional[Path]:
|
||||
"""Backwards compatible path property."""
|
||||
return self.local_path
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
data = {
|
||||
"name": self.name,
|
||||
"cid": self.cid,
|
||||
"asset_type": self.asset_type,
|
||||
"tags": self.tags,
|
||||
"metadata": self.metadata,
|
||||
"created_at": self.created_at,
|
||||
}
|
||||
if self.url:
|
||||
data["url"] = self.url
|
||||
if self.local_path:
|
||||
data["local_path"] = str(self.local_path)
|
||||
return data
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "Asset":
|
||||
local_path = data.get("local_path") or data.get("path") # backwards compat
|
||||
return cls(
|
||||
name=data["name"],
|
||||
cid=data["cid"],
|
||||
url=data.get("url"),
|
||||
local_path=Path(local_path) if local_path else None,
|
||||
asset_type=data.get("asset_type", "unknown"),
|
||||
tags=data.get("tags", []),
|
||||
metadata=data.get("metadata", {}),
|
||||
created_at=data.get("created_at", time.time()),
|
||||
)
|
||||
|
||||
|
||||
class Registry:
|
||||
"""
|
||||
The Art DAG registry.
|
||||
|
||||
Stores named assets that can be referenced by DAGs.
|
||||
|
||||
Structure:
|
||||
registry_dir/
|
||||
registry.json # Index of all assets
|
||||
assets/ # Optional: copied asset files
|
||||
<hash>/
|
||||
<filename>
|
||||
"""
|
||||
|
||||
def __init__(self, registry_dir: Path | str, copy_assets: bool = False):
|
||||
"""
|
||||
Initialize the registry.
|
||||
|
||||
Args:
|
||||
registry_dir: Directory to store registry data
|
||||
copy_assets: If True, copy assets into registry (content-addressed)
|
||||
"""
|
||||
self.registry_dir = Path(registry_dir)
|
||||
self.registry_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.copy_assets = copy_assets
|
||||
self._assets: Dict[str, Asset] = {}
|
||||
self._load()
|
||||
|
||||
def _index_path(self) -> Path:
|
||||
return self.registry_dir / "registry.json"
|
||||
|
||||
def _assets_dir(self) -> Path:
|
||||
return self.registry_dir / "assets"
|
||||
|
||||
def _load(self):
|
||||
"""Load registry from disk."""
|
||||
index_path = self._index_path()
|
||||
if index_path.exists():
|
||||
with open(index_path) as f:
|
||||
data = json.load(f)
|
||||
self._assets = {
|
||||
name: Asset.from_dict(asset_data)
|
||||
for name, asset_data in data.get("assets", {}).items()
|
||||
}
|
||||
|
||||
def _save(self):
|
||||
"""Save registry to disk."""
|
||||
data = {
|
||||
"version": "1.0",
|
||||
"assets": {name: asset.to_dict() for name, asset in self._assets.items()},
|
||||
}
|
||||
with open(self._index_path(), "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
def add(
|
||||
self,
|
||||
name: str,
|
||||
cid: str,
|
||||
url: str = None,
|
||||
local_path: Path | str = None,
|
||||
asset_type: str = None,
|
||||
tags: List[str] = None,
|
||||
metadata: Dict[str, Any] = None,
|
||||
) -> Asset:
|
||||
"""
|
||||
Add an asset to the registry.
|
||||
|
||||
Args:
|
||||
name: Unique name for the asset
|
||||
cid: SHA-3-256 hash of the content (the canonical identifier)
|
||||
url: Public URL where the asset can be fetched
|
||||
local_path: Optional local path (for local execution)
|
||||
asset_type: Type of asset (image, video, audio, etc.)
|
||||
tags: List of tags for categorization
|
||||
metadata: Additional metadata
|
||||
|
||||
Returns:
|
||||
The created Asset
|
||||
"""
|
||||
# Auto-detect asset type from URL or path extension
|
||||
if asset_type is None:
|
||||
ext = None
|
||||
if url:
|
||||
ext = Path(url.split("?")[0]).suffix.lower()
|
||||
elif local_path:
|
||||
ext = Path(local_path).suffix.lower()
|
||||
if ext:
|
||||
type_map = {
|
||||
".jpg": "image", ".jpeg": "image", ".png": "image",
|
||||
".gif": "image", ".webp": "image", ".bmp": "image",
|
||||
".mp4": "video", ".mkv": "video", ".avi": "video",
|
||||
".mov": "video", ".webm": "video",
|
||||
".mp3": "audio", ".wav": "audio", ".flac": "audio",
|
||||
".ogg": "audio", ".aac": "audio",
|
||||
}
|
||||
asset_type = type_map.get(ext, "unknown")
|
||||
else:
|
||||
asset_type = "unknown"
|
||||
|
||||
asset = Asset(
|
||||
name=name,
|
||||
cid=cid,
|
||||
url=url,
|
||||
local_path=Path(local_path).resolve() if local_path else None,
|
||||
asset_type=asset_type,
|
||||
tags=tags or [],
|
||||
metadata=metadata or {},
|
||||
)
|
||||
|
||||
self._assets[name] = asset
|
||||
self._save()
|
||||
return asset
|
||||
|
||||
def add_from_file(
|
||||
self,
|
||||
name: str,
|
||||
path: Path | str,
|
||||
url: str = None,
|
||||
asset_type: str = None,
|
||||
tags: List[str] = None,
|
||||
metadata: Dict[str, Any] = None,
|
||||
) -> Asset:
|
||||
"""
|
||||
Add an asset from a local file (computes hash automatically).
|
||||
|
||||
Args:
|
||||
name: Unique name for the asset
|
||||
path: Path to the source file
|
||||
url: Optional public URL
|
||||
asset_type: Type of asset (auto-detected if not provided)
|
||||
tags: List of tags for categorization
|
||||
metadata: Additional metadata
|
||||
|
||||
Returns:
|
||||
The created Asset
|
||||
"""
|
||||
path = Path(path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Asset file not found: {path}")
|
||||
|
||||
cid = _file_hash(path)
|
||||
|
||||
return self.add(
|
||||
name=name,
|
||||
cid=cid,
|
||||
url=url,
|
||||
local_path=path,
|
||||
asset_type=asset_type,
|
||||
tags=tags,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
def get(self, name: str) -> Optional[Asset]:
|
||||
"""Get an asset by name."""
|
||||
return self._assets.get(name)
|
||||
|
||||
def remove(self, name: str) -> bool:
|
||||
"""Remove an asset from the registry."""
|
||||
if name not in self._assets:
|
||||
return False
|
||||
del self._assets[name]
|
||||
self._save()
|
||||
return True
|
||||
|
||||
def list(self) -> List[Asset]:
|
||||
"""List all assets."""
|
||||
return list(self._assets.values())
|
||||
|
||||
def find_by_tag(self, tag: str) -> List[Asset]:
|
||||
"""Find assets with a specific tag."""
|
||||
return [a for a in self._assets.values() if tag in a.tags]
|
||||
|
||||
def find_by_type(self, asset_type: str) -> List[Asset]:
|
||||
"""Find assets of a specific type."""
|
||||
return [a for a in self._assets.values() if a.asset_type == asset_type]
|
||||
|
||||
def find_by_hash(self, cid: str) -> Optional[Asset]:
|
||||
"""Find an asset by content hash."""
|
||||
for asset in self._assets.values():
|
||||
if asset.cid == cid:
|
||||
return asset
|
||||
return None
|
||||
|
||||
def __contains__(self, name: str) -> bool:
|
||||
return name in self._assets
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._assets)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._assets.values())
|
||||
Reference in New Issue
Block a user