Files
celery/tests/test_cache_manager.py
gilesb 92d26b2b72 Rename content_hash/output_hash to cid throughout
Refactor to use IPFS CID as the primary content identifier:
- Update database schema: content_hash -> cid, output_hash -> output_cid
- Update all services, routers, and tasks to use cid terminology
- Update HTML templates to display CID instead of hash
- Update cache_manager parameter names
- Update README documentation

This completes the transition to CID-only content addressing.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 08:02:44 +00:00

398 lines
14 KiB
Python

# tests/test_cache_manager.py
"""Tests for the L1 cache manager."""
import tempfile
import time
from pathlib import Path
from unittest.mock import Mock, patch
import pytest
from cache_manager import (
L1CacheManager,
L2SharedChecker,
CachedFile,
file_hash,
)
@pytest.fixture
def temp_dir():
"""Create a temporary directory for tests."""
with tempfile.TemporaryDirectory() as tmpdir:
yield Path(tmpdir)
@pytest.fixture
def mock_l2():
"""Mock L2 server responses."""
with patch("cache_manager.requests") as mock_requests:
mock_requests.get.return_value = Mock(status_code=404)
yield mock_requests
@pytest.fixture
def manager(temp_dir, mock_l2):
"""Create a cache manager instance."""
return L1CacheManager(
cache_dir=temp_dir / "cache",
l2_server="http://mock-l2:8200",
)
def create_test_file(path: Path, content: str = "test content") -> Path:
"""Create a test file with content."""
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content)
return path
class TestFileHash:
"""Tests for file_hash function."""
def test_consistent_hash(self, temp_dir):
"""Same content produces same hash."""
file1 = create_test_file(temp_dir / "f1.txt", "hello")
file2 = create_test_file(temp_dir / "f2.txt", "hello")
assert file_hash(file1) == file_hash(file2)
def test_different_content_different_hash(self, temp_dir):
"""Different content produces different hash."""
file1 = create_test_file(temp_dir / "f1.txt", "hello")
file2 = create_test_file(temp_dir / "f2.txt", "world")
assert file_hash(file1) != file_hash(file2)
def test_sha3_256_length(self, temp_dir):
"""Hash is SHA3-256 (64 hex chars)."""
f = create_test_file(temp_dir / "f.txt", "test")
assert len(file_hash(f)) == 64
class TestL2SharedChecker:
"""Tests for L2 shared status checking."""
def test_not_shared_returns_false(self, mock_l2):
"""Non-existent content returns False."""
checker = L2SharedChecker("http://mock:8200")
mock_l2.get.return_value = Mock(status_code=404)
assert checker.is_shared("abc123") is False
def test_shared_returns_true(self, mock_l2):
"""Published content returns True."""
checker = L2SharedChecker("http://mock:8200")
mock_l2.get.return_value = Mock(status_code=200)
assert checker.is_shared("abc123") is True
def test_caches_result(self, mock_l2):
"""Results are cached to avoid repeated API calls."""
checker = L2SharedChecker("http://mock:8200", cache_ttl=60)
mock_l2.get.return_value = Mock(status_code=200)
checker.is_shared("abc123")
checker.is_shared("abc123")
# Should only call API once
assert mock_l2.get.call_count == 1
def test_mark_shared(self, mock_l2):
"""mark_shared updates cache without API call."""
checker = L2SharedChecker("http://mock:8200")
checker.mark_shared("abc123")
assert checker.is_shared("abc123") is True
assert mock_l2.get.call_count == 0
def test_invalidate(self, mock_l2):
"""invalidate clears cache for a hash."""
checker = L2SharedChecker("http://mock:8200")
mock_l2.get.return_value = Mock(status_code=200)
checker.is_shared("abc123")
checker.invalidate("abc123")
mock_l2.get.return_value = Mock(status_code=404)
assert checker.is_shared("abc123") is False
def test_error_returns_true(self, mock_l2):
"""API errors return True (safe - prevents accidental deletion)."""
checker = L2SharedChecker("http://mock:8200")
mock_l2.get.side_effect = Exception("Network error")
# On error, assume IS shared to prevent accidental deletion
assert checker.is_shared("abc123") is True
class TestL1CacheManagerStorage:
"""Tests for cache storage operations."""
def test_put_and_get_by_cid(self, manager, temp_dir):
"""Can store and retrieve by content hash."""
test_file = create_test_file(temp_dir / "input.txt", "hello world")
cached = manager.put(test_file, node_type="test")
retrieved_path = manager.get_by_cid(cached.cid)
assert retrieved_path is not None
assert retrieved_path.read_text() == "hello world"
def test_put_with_custom_node_id(self, manager, temp_dir):
"""Can store with custom node_id."""
test_file = create_test_file(temp_dir / "input.txt", "content")
cached = manager.put(test_file, node_id="custom-node-123", node_type="test")
assert cached.node_id == "custom-node-123"
assert manager.get_by_node_id("custom-node-123") is not None
def test_has_content(self, manager, temp_dir):
"""has_content checks existence."""
test_file = create_test_file(temp_dir / "input.txt", "data")
cached = manager.put(test_file, node_type="test")
assert manager.has_content(cached.cid) is True
assert manager.has_content("nonexistent") is False
def test_list_all(self, manager, temp_dir):
"""list_all returns all cached files."""
f1 = create_test_file(temp_dir / "f1.txt", "one")
f2 = create_test_file(temp_dir / "f2.txt", "two")
manager.put(f1, node_type="test")
manager.put(f2, node_type="test")
all_files = manager.list_all()
assert len(all_files) == 2
def test_deduplication(self, manager, temp_dir):
"""Same content is not stored twice."""
f1 = create_test_file(temp_dir / "f1.txt", "identical")
f2 = create_test_file(temp_dir / "f2.txt", "identical")
cached1 = manager.put(f1, node_type="test")
cached2 = manager.put(f2, node_type="test")
assert cached1.cid == cached2.cid
assert len(manager.list_all()) == 1
class TestL1CacheManagerActivities:
"""Tests for activity tracking."""
def test_record_simple_activity(self, manager, temp_dir):
"""Can record a simple activity."""
input_file = create_test_file(temp_dir / "input.txt", "input")
output_file = create_test_file(temp_dir / "output.txt", "output")
input_cached = manager.put(input_file, node_type="source")
output_cached = manager.put(output_file, node_type="effect")
activity = manager.record_simple_activity(
input_hashes=[input_cached.cid],
output_cid=output_cached.cid,
run_id="run-001",
)
assert activity.activity_id == "run-001"
assert input_cached.cid in activity.input_ids
assert activity.output_id == output_cached.cid
def test_list_activities(self, manager, temp_dir):
"""Can list all activities."""
for i in range(3):
inp = create_test_file(temp_dir / f"in{i}.txt", f"input{i}")
out = create_test_file(temp_dir / f"out{i}.txt", f"output{i}")
inp_c = manager.put(inp, node_type="source")
out_c = manager.put(out, node_type="effect")
manager.record_simple_activity([inp_c.cid], out_c.cid)
activities = manager.list_activities()
assert len(activities) == 3
def test_find_activities_by_inputs(self, manager, temp_dir):
"""Can find activities with same inputs."""
input_file = create_test_file(temp_dir / "shared_input.txt", "shared")
input_cached = manager.put(input_file, node_type="source")
# Two activities with same input
out1 = create_test_file(temp_dir / "out1.txt", "output1")
out2 = create_test_file(temp_dir / "out2.txt", "output2")
out1_c = manager.put(out1, node_type="effect")
out2_c = manager.put(out2, node_type="effect")
manager.record_simple_activity([input_cached.cid], out1_c.cid, "run1")
manager.record_simple_activity([input_cached.cid], out2_c.cid, "run2")
found = manager.find_activities_by_inputs([input_cached.cid])
assert len(found) == 2
class TestL1CacheManagerDeletionRules:
"""Tests for deletion rules enforcement."""
def test_can_delete_orphaned_item(self, manager, temp_dir):
"""Orphaned items can be deleted."""
test_file = create_test_file(temp_dir / "orphan.txt", "orphan")
cached = manager.put(test_file, node_type="test")
can_delete, reason = manager.can_delete(cached.cid)
assert can_delete is True
def test_cannot_delete_activity_input(self, manager, temp_dir):
"""Activity inputs cannot be deleted."""
input_file = create_test_file(temp_dir / "input.txt", "input")
output_file = create_test_file(temp_dir / "output.txt", "output")
input_cached = manager.put(input_file, node_type="source")
output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity(
[input_cached.cid],
output_cached.cid,
)
can_delete, reason = manager.can_delete(input_cached.cid)
assert can_delete is False
assert "input" in reason.lower()
def test_cannot_delete_activity_output(self, manager, temp_dir):
"""Activity outputs cannot be deleted."""
input_file = create_test_file(temp_dir / "input.txt", "input")
output_file = create_test_file(temp_dir / "output.txt", "output")
input_cached = manager.put(input_file, node_type="source")
output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity(
[input_cached.cid],
output_cached.cid,
)
can_delete, reason = manager.can_delete(output_cached.cid)
assert can_delete is False
assert "output" in reason.lower()
def test_cannot_delete_pinned_item(self, manager, temp_dir):
"""Pinned items cannot be deleted."""
test_file = create_test_file(temp_dir / "shared.txt", "shared")
cached = manager.put(test_file, node_type="test")
# Mark as pinned (published)
manager.pin(cached.cid, reason="published")
can_delete, reason = manager.can_delete(cached.cid)
assert can_delete is False
assert "pinned" in reason
def test_delete_orphaned_item(self, manager, temp_dir):
"""Can delete orphaned items."""
test_file = create_test_file(temp_dir / "orphan.txt", "orphan")
cached = manager.put(test_file, node_type="test")
success, msg = manager.delete_by_cid(cached.cid)
assert success is True
assert manager.has_content(cached.cid) is False
def test_delete_protected_item_fails(self, manager, temp_dir):
"""Cannot delete protected items."""
input_file = create_test_file(temp_dir / "input.txt", "input")
output_file = create_test_file(temp_dir / "output.txt", "output")
input_cached = manager.put(input_file, node_type="source")
output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity(
[input_cached.cid],
output_cached.cid,
)
success, msg = manager.delete_by_cid(input_cached.cid)
assert success is False
assert manager.has_content(input_cached.cid) is True
class TestL1CacheManagerActivityDiscard:
"""Tests for activity discard functionality."""
def test_can_discard_unshared_activity(self, manager, temp_dir):
"""Activities with no shared items can be discarded."""
input_file = create_test_file(temp_dir / "input.txt", "input")
output_file = create_test_file(temp_dir / "output.txt", "output")
input_cached = manager.put(input_file, node_type="source")
output_cached = manager.put(output_file, node_type="effect")
activity = manager.record_simple_activity(
[input_cached.cid],
output_cached.cid,
"run-001",
)
can_discard, reason = manager.can_discard_activity("run-001")
assert can_discard is True
def test_cannot_discard_activity_with_pinned_output(self, manager, temp_dir):
"""Activities with pinned outputs cannot be discarded."""
input_file = create_test_file(temp_dir / "input.txt", "input")
output_file = create_test_file(temp_dir / "output.txt", "output")
input_cached = manager.put(input_file, node_type="source")
output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity(
[input_cached.cid],
output_cached.cid,
"run-001",
)
# Mark output as pinned (published)
manager.pin(output_cached.cid, reason="published")
can_discard, reason = manager.can_discard_activity("run-001")
assert can_discard is False
assert "pinned" in reason
def test_discard_activity_cleans_up(self, manager, temp_dir):
"""Discarding activity cleans up orphaned items."""
input_file = create_test_file(temp_dir / "input.txt", "input")
output_file = create_test_file(temp_dir / "output.txt", "output")
input_cached = manager.put(input_file, node_type="source")
output_cached = manager.put(output_file, node_type="effect")
manager.record_simple_activity(
[input_cached.cid],
output_cached.cid,
"run-001",
)
success, msg = manager.discard_activity("run-001")
assert success is True
assert manager.get_activity("run-001") is None
class TestL1CacheManagerStats:
"""Tests for cache statistics."""
def test_get_stats(self, manager, temp_dir):
"""get_stats returns cache statistics."""
f1 = create_test_file(temp_dir / "f1.txt", "content1")
f2 = create_test_file(temp_dir / "f2.txt", "content2")
manager.put(f1, node_type="test")
manager.put(f2, node_type="test")
stats = manager.get_stats()
assert stats["total_entries"] == 2
assert stats["total_size_bytes"] > 0
assert "activities" in stats