Squashed 'l1/' content from commit 670aa58

git-subtree-dir: l1 git-subtree-split: 670aa582df99e87fca7c247b949baf452e8c234f
2026-02-24 23:07:19 +00:00
commit 80c94ebea7
225 changed files with 57298 additions and 0 deletions
--- a/tests/test_execute_recipe.py
+++ b/tests/test_execute_recipe.py
@@ -0,0 +1,529 @@
+"""
+Tests for execute_recipe SOURCE node resolution.
+
+These tests verify that SOURCE nodes with :input true are correctly
+resolved from input_hashes at execution time.
+"""
+
+import pytest
+from unittest.mock import MagicMock, patch
+from typing import Dict, Any
+
+
+class MockStep:
+    """Mock ExecutionStep for testing."""
+
+    def __init__(
+        self,
+        step_id: str,
+        node_type: str,
+        config: Dict[str, Any],
+        cache_id: str,
+        input_steps: list = None,
+        level: int = 0,
+    ):
+        self.step_id = step_id
+        self.node_type = node_type
+        self.config = config
+        self.cache_id = cache_id
+        self.input_steps = input_steps or []
+        self.level = level
+        self.name = config.get("name")
+        self.outputs = []
+
+
+class MockPlan:
+    """Mock ExecutionPlanSexp for testing."""
+
+    def __init__(self, steps: list, output_step_id: str, plan_id: str = "test-plan"):
+        self.steps = steps
+        self.output_step_id = output_step_id
+        self.plan_id = plan_id
+
+    def to_string(self, pretty: bool = False) -> str:
+        return "(plan test)"
+
+
+def resolve_source_cid(step_config: Dict[str, Any], input_hashes: Dict[str, str]) -> str:
+    """
+    Resolve CID for a SOURCE node.
+
+    This is the logic that should be in execute_recipe - extracted here for unit testing.
+    """
+    source_cid = step_config.get("cid")
+
+    # If source has :input true, resolve CID from input_hashes
+    if not source_cid and step_config.get("input"):
+        source_name = step_config.get("name", "")
+        # Try various key formats for lookup
+        name_variants = [
+            source_name,
+            source_name.lower().replace(" ", "-"),
+            source_name.lower().replace(" ", "_"),
+            source_name.lower(),
+        ]
+        for variant in name_variants:
+            if variant in input_hashes:
+                source_cid = input_hashes[variant]
+                break
+
+        if not source_cid:
+            raise ValueError(
+                f"SOURCE '{source_name}' not found in input_hashes. "
+                f"Available: {list(input_hashes.keys())}"
+            )
+
+    return source_cid
+
+
+class TestSourceCidResolution:
+    """Tests for SOURCE node CID resolution from input_hashes."""
+
+    def test_source_with_fixed_cid(self):
+        """SOURCE with :cid should use that directly."""
+        config = {"cid": "QmFixedCid123"}
+        input_hashes = {}
+
+        cid = resolve_source_cid(config, input_hashes)
+        assert cid == "QmFixedCid123"
+
+    def test_source_with_input_true_exact_match(self):
+        """SOURCE with :input true should resolve from input_hashes by exact name."""
+        config = {"input": True, "name": "my-video"}
+        input_hashes = {"my-video": "QmInputVideo456"}
+
+        cid = resolve_source_cid(config, input_hashes)
+        assert cid == "QmInputVideo456"
+
+    def test_source_with_input_true_normalized_dash(self):
+        """SOURCE with :input true should resolve from normalized dash format."""
+        config = {"input": True, "name": "Second Video"}
+        input_hashes = {"second-video": "QmSecondVideo789"}
+
+        cid = resolve_source_cid(config, input_hashes)
+        assert cid == "QmSecondVideo789"
+
+    def test_source_with_input_true_normalized_underscore(self):
+        """SOURCE with :input true should resolve from normalized underscore format."""
+        config = {"input": True, "name": "Second Video"}
+        input_hashes = {"second_video": "QmSecondVideoUnderscore"}
+
+        cid = resolve_source_cid(config, input_hashes)
+        assert cid == "QmSecondVideoUnderscore"
+
+    def test_source_with_input_true_lowercase(self):
+        """SOURCE with :input true should resolve from lowercase format."""
+        config = {"input": True, "name": "MyVideo"}
+        input_hashes = {"myvideo": "QmLowercaseVideo"}
+
+        cid = resolve_source_cid(config, input_hashes)
+        assert cid == "QmLowercaseVideo"
+
+    def test_source_with_input_true_missing_raises(self):
+        """SOURCE with :input true should raise if not in input_hashes."""
+        config = {"input": True, "name": "Missing Video"}
+        input_hashes = {"other-video": "QmOther123"}
+
+        with pytest.raises(ValueError) as excinfo:
+            resolve_source_cid(config, input_hashes)
+
+        assert "Missing Video" in str(excinfo.value)
+        assert "not found in input_hashes" in str(excinfo.value)
+        assert "other-video" in str(excinfo.value)  # Shows available keys
+
+    def test_source_without_cid_or_input_returns_none(self):
+        """SOURCE without :cid or :input should return None."""
+        config = {"name": "some-source"}
+        input_hashes = {}
+
+        cid = resolve_source_cid(config, input_hashes)
+        assert cid is None
+
+    def test_source_priority_cid_over_input(self):
+        """SOURCE with both :cid and :input true should use :cid."""
+        config = {"cid": "QmDirectCid", "input": True, "name": "my-video"}
+        input_hashes = {"my-video": "QmInputHashCid"}
+
+        cid = resolve_source_cid(config, input_hashes)
+        assert cid == "QmDirectCid"  # :cid takes priority
+
+
+class TestSourceNameVariants:
+    """Tests for the various input name normalization formats."""
+
+    @pytest.mark.parametrize("source_name,input_key", [
+        ("video", "video"),
+        ("My Video", "my-video"),
+        ("My Video", "my_video"),
+        ("My Video", "My Video"),
+        ("CamelCase", "camelcase"),
+        ("multiple   spaces", "multiple   spaces"),  # Only single replace
+    ])
+    def test_name_variant_matching(self, source_name: str, input_key: str):
+        """Various name formats should match."""
+        config = {"input": True, "name": source_name}
+        input_hashes = {input_key: "QmTestCid"}
+
+        cid = resolve_source_cid(config, input_hashes)
+        assert cid == "QmTestCid"
+
+
+class TestExecuteRecipeIntegration:
+    """Integration tests for execute_recipe with SOURCE nodes."""
+
+    def test_recipe_with_user_input_source(self):
+        """
+        Recipe execution should resolve SOURCE nodes with :input true.
+
+        This is the bug that was causing "No executor for node type: SOURCE".
+        """
+        # Create mock plan with a SOURCE that has :input true
+        source_step = MockStep(
+            step_id="source_1",
+            node_type="SOURCE",
+            config={"input": True, "name": "Second Video", "description": "User input"},
+            cache_id="abc123",
+            level=0,
+        )
+
+        effect_step = MockStep(
+            step_id="effect_1",
+            node_type="EFFECT",
+            config={"effect": "invert"},
+            cache_id="def456",
+            input_steps=["source_1"],
+            level=1,
+        )
+
+        plan = MockPlan(
+            steps=[source_step, effect_step],
+            output_step_id="effect_1",
+        )
+
+        # Input hashes provided by user
+        input_hashes = {
+            "second-video": "QmS4885aRikrjDB4yHPg9yTiPcBFWadZKVfAEvUy7B32zS"
+        }
+
+        # Verify source CID resolution works
+        resolved_cid = resolve_source_cid(source_step.config, input_hashes)
+        assert resolved_cid == "QmS4885aRikrjDB4yHPg9yTiPcBFWadZKVfAEvUy7B32zS"
+
+    def test_recipe_with_fixed_and_user_sources(self):
+        """
+        Recipe with both fixed (asset) and user-input sources.
+
+        This is the dog-invert-concat recipe pattern:
+        - Fixed source: cat asset with known CID
+        - User input: Second Video from input_hashes
+        """
+        fixed_source = MockStep(
+            step_id="source_cat",
+            node_type="SOURCE",
+            config={"cid": "QmCatVideo123", "asset": "cat"},
+            cache_id="cat_cache",
+            level=0,
+        )
+
+        user_source = MockStep(
+            step_id="source_user",
+            node_type="SOURCE",
+            config={"input": True, "name": "Second Video"},
+            cache_id="user_cache",
+            level=0,
+        )
+
+        input_hashes = {
+            "second-video": "QmUserProvidedVideo456"
+        }
+
+        # Fixed source uses its cid
+        fixed_cid = resolve_source_cid(fixed_source.config, input_hashes)
+        assert fixed_cid == "QmCatVideo123"
+
+        # User source resolves from input_hashes
+        user_cid = resolve_source_cid(user_source.config, input_hashes)
+        assert user_cid == "QmUserProvidedVideo456"
+
+
+class TestCompoundNodeHandling:
+    """
+    Tests for COMPOUND node handling.
+
+    COMPOUND nodes are collapsed effect chains that should be executed
+    sequentially through their respective effect executors.
+
+    Bug fixed: "No executor for node type: COMPOUND"
+    """
+
+    def test_compound_node_has_filter_chain(self):
+        """COMPOUND nodes must have a filter_chain config."""
+        step = MockStep(
+            step_id="compound_1",
+            node_type="COMPOUND",
+            config={
+                "filter_chain": [
+                    {"type": "EFFECT", "config": {"effect": "identity"}},
+                    {"type": "EFFECT", "config": {"effect": "dog"}},
+                ],
+                "inputs": ["source_1"],
+            },
+            cache_id="compound_cache",
+            level=1,
+        )
+
+        assert step.node_type == "COMPOUND"
+        assert "filter_chain" in step.config
+        assert len(step.config["filter_chain"]) == 2
+
+    def test_compound_filter_chain_has_effects(self):
+        """COMPOUND filter_chain should contain EFFECT items with effect names."""
+        filter_chain = [
+            {"type": "EFFECT", "config": {"effect": "identity", "cid": "Qm123"}},
+            {"type": "EFFECT", "config": {"effect": "dog", "cid": "Qm456"}},
+        ]
+
+        for item in filter_chain:
+            assert item["type"] == "EFFECT"
+            assert "effect" in item["config"]
+            assert "cid" in item["config"]
+
+    def test_compound_requires_inputs(self):
+        """COMPOUND nodes must have input steps."""
+        step = MockStep(
+            step_id="compound_1",
+            node_type="COMPOUND",
+            config={"filter_chain": [], "inputs": []},
+            cache_id="compound_cache",
+            input_steps=[],
+            level=1,
+        )
+
+        # Empty inputs should be detected as error
+        assert len(step.input_steps) == 0
+        # The execute_recipe should raise ValueError for empty inputs
+
+
+class TestCacheIdLookup:
+    """
+    Tests for cache lookup by code-addressed cache_id.
+
+    Bug fixed: Cache lookups by cache_id (code hash) were failing because
+    only IPFS CID was indexed. Now we also index by node_id when different.
+    """
+
+    def test_cache_id_is_code_addressed(self):
+        """cache_id should be a SHA3-256 hash (64 hex chars), not IPFS CID."""
+        # Code-addressed hash example
+        cache_id = "5702aaec14adaddda9baefa94d5842143749ee19e6bb7c1fa7068dce21f51ed4"
+
+        assert len(cache_id) == 64
+        assert all(c in "0123456789abcdef" for c in cache_id)
+        assert not cache_id.startswith("Qm")  # Not IPFS CID
+
+    def test_ipfs_cid_format(self):
+        """IPFS CIDs start with 'Qm' (v0) or 'bafy' (v1)."""
+        ipfs_cid_v0 = "QmXrj6tSSn1vQXxxEY2Tyoudvt4CeeqR9gGQwSt7WFrhMZ"
+        ipfs_cid_v1 = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
+
+        assert ipfs_cid_v0.startswith("Qm")
+        assert ipfs_cid_v1.startswith("bafy")
+
+    def test_cache_id_differs_from_ipfs_cid(self):
+        """
+        Code-addressed cache_id is computed BEFORE execution.
+        IPFS CID is computed AFTER execution from file content.
+        They will differ for the same logical step.
+        """
+        # Same step can have:
+        cache_id = "5702aaec14adaddda9baefa94d5842143749ee19e6bb7c1fa7068dce21f51ed4"
+        ipfs_cid = "QmXrj6tSSn1vQXxxEY2Tyoudvt4CeeqR9gGQwSt7WFrhMZ"
+
+        assert cache_id != ipfs_cid
+        # Both should be indexable for cache lookups
+
+
+class TestPlanStepTypes:
+    """
+    Tests verifying all node types in S-expression plans are handled.
+
+    These tests document the node types that execute_recipe must handle.
+    """
+
+    def test_source_node_types(self):
+        """SOURCE nodes: fixed asset or user input."""
+        # Fixed asset source
+        fixed = MockStep("s1", "SOURCE", {"cid": "Qm123"}, "cache1")
+        assert fixed.node_type == "SOURCE"
+        assert "cid" in fixed.config
+
+        # User input source
+        user = MockStep("s2", "SOURCE", {"input": True, "name": "video"}, "cache2")
+        assert user.node_type == "SOURCE"
+        assert user.config.get("input") is True
+
+    def test_effect_node_type(self):
+        """EFFECT nodes: single effect application."""
+        step = MockStep(
+            "e1", "EFFECT",
+            {"effect": "invert", "cid": "QmEffect123", "intensity": 1.0},
+            "cache3"
+        )
+        assert step.node_type == "EFFECT"
+        assert "effect" in step.config
+
+    def test_compound_node_type(self):
+        """COMPOUND nodes: collapsed effect chains."""
+        step = MockStep(
+            "c1", "COMPOUND",
+            {"filter_chain": [{"type": "EFFECT", "config": {}}]},
+            "cache4"
+        )
+        assert step.node_type == "COMPOUND"
+        assert "filter_chain" in step.config
+
+    def test_sequence_node_type(self):
+        """SEQUENCE nodes: concatenate multiple clips."""
+        step = MockStep(
+            "seq1", "SEQUENCE",
+            {"transition": {"type": "cut"}},
+            "cache5",
+            input_steps=["clip1", "clip2"]
+        )
+        assert step.node_type == "SEQUENCE"
+
+
+class TestNodeTypeCaseSensitivity:
+    """
+    Tests for node type case handling.
+
+    Bug fixed: S-expression plans use lowercase (source, compound, effect)
+    but code was checking uppercase (SOURCE, COMPOUND, EFFECT).
+    """
+
+    def test_source_lowercase_from_sexp(self):
+        """S-expression plans produce lowercase node types."""
+        # From plan: (source :cid "Qm...")
+        step = MockStep("s1", "source", {"cid": "Qm123"}, "cache1")
+
+        # Code should handle lowercase
+        assert step.node_type.upper() == "SOURCE"
+
+    def test_compound_lowercase_from_sexp(self):
+        """COMPOUND from S-expression is lowercase."""
+        # From plan: (compound :filter_chain ...)
+        step = MockStep("c1", "compound", {"filter_chain": []}, "cache2")
+
+        assert step.node_type.upper() == "COMPOUND"
+
+    def test_effect_lowercase_from_sexp(self):
+        """EFFECT from S-expression is lowercase."""
+        # From plan: (effect :effect "invert" ...)
+        step = MockStep("e1", "effect", {"effect": "invert"}, "cache3")
+
+        assert step.node_type.upper() == "EFFECT"
+
+    def test_sequence_lowercase_from_sexp(self):
+        """SEQUENCE from S-expression is lowercase."""
+        # From plan: (sequence :transition ...)
+        step = MockStep("seq1", "sequence", {"transition": {}}, "cache4")
+
+        assert step.node_type.upper() == "SEQUENCE"
+
+    def test_node_type_comparison_must_be_case_insensitive(self):
+        """
+        Node type comparisons must be case-insensitive.
+
+        This is the actual bug - checking step.node_type == "SOURCE"
+        fails when step.node_type is "source" from S-expression.
+        """
+        sexp_types = ["source", "compound", "effect", "sequence"]
+        code_types = ["SOURCE", "COMPOUND", "EFFECT", "SEQUENCE"]
+
+        for sexp, code in zip(sexp_types, code_types):
+            # Wrong: direct comparison fails
+            assert sexp != code, f"{sexp} should not equal {code}"
+
+            # Right: case-insensitive comparison works
+            assert sexp.upper() == code, f"{sexp}.upper() should equal {code}"
+
+
+class TestExecuteRecipeErrorHandling:
+    """Tests for error handling in execute_recipe."""
+
+    def test_missing_input_hash_error_message(self):
+        """Error should list available input keys when source not found."""
+        config = {"input": True, "name": "Unknown Video"}
+        input_hashes = {"video-a": "Qm1", "video-b": "Qm2"}
+
+        with pytest.raises(ValueError) as excinfo:
+            resolve_source_cid(config, input_hashes)
+
+        error_msg = str(excinfo.value)
+        assert "Unknown Video" in error_msg
+        assert "video-a" in error_msg or "video-b" in error_msg
+
+    def test_source_no_cid_no_input_error(self):
+        """SOURCE without cid or input flag should return None (invalid)."""
+        config = {"name": "broken-source"}  # Missing both cid and input
+        input_hashes = {}
+
+        result = resolve_source_cid(config, input_hashes)
+        assert result is None  # execute_recipe should catch this
+
+
+class TestRecipeOutputRequired:
+    """
+    Tests verifying recipes must produce output to succeed.
+
+    Bug: Recipe was returning success=True with output_cid=None
+    """
+
+    def test_recipe_without_output_should_fail(self):
+        """
+        Recipe execution must fail if no output is produced.
+
+        This catches the bug where execute_recipe returned success=True
+        but output_cid was None.
+        """
+        # Simulate the check that should happen in execute_recipe
+        output_cid = None
+        step_results = {"step1": {"status": "executed", "path": "/tmp/x"}}
+
+        # This is the logic that should be in execute_recipe
+        success = output_cid is not None
+
+        assert success is False, "Recipe with no output_cid should fail"
+
+    def test_recipe_with_output_should_succeed(self):
+        """Recipe with valid output should succeed."""
+        output_cid = "QmOutputCid123"
+
+        success = output_cid is not None
+        assert success is True
+
+    def test_output_step_result_must_have_cid(self):
+        """Output step result must contain cid or cache_id."""
+        # Step result without cid
+        bad_result = {"status": "executed", "path": "/tmp/output.mkv"}
+        output_cid = bad_result.get("cid") or bad_result.get("cache_id")
+        assert output_cid is None, "Should detect missing cid"
+
+        # Step result with cid
+        good_result = {"status": "executed", "path": "/tmp/output.mkv", "cid": "Qm123"}
+        output_cid = good_result.get("cid") or good_result.get("cache_id")
+        assert output_cid == "Qm123"
+
+    def test_output_step_must_exist_in_results(self):
+        """Output step must be present in step_results."""
+        step_results = {
+            "source_1": {"status": "source", "cid": "QmSrc"},
+            "effect_1": {"status": "executed", "cid": "QmEffect"},
+            # Note: output_step "sequence_1" is missing!
+        }
+
+        output_step_id = "sequence_1"
+        output_result = step_results.get(output_step_id, {})
+        output_cid = output_result.get("cid")
+
+        assert output_cid is None, "Missing output step should result in no cid"