mono/l1/tests/test_execute_recipe.py

"""
Tests for execute_recipe SOURCE node resolution.

These tests verify that SOURCE nodes with :input true are correctly
resolved from input_hashes at execution time.
"""

import pytest
from unittest.mock import MagicMock, patch
from typing import Dict, Any


class MockStep:
    """Mock ExecutionStep for testing."""

    def __init__(
        self,
        step_id: str,
        node_type: str,
        config: Dict[str, Any],
        cache_id: str,
        input_steps: list = None,
        level: int = 0,
    ):
        self.step_id = step_id
        self.node_type = node_type
        self.config = config
        self.cache_id = cache_id
        self.input_steps = input_steps or []
        self.level = level
        self.name = config.get("name")
        self.outputs = []


class MockPlan:
    """Mock ExecutionPlanSexp for testing."""

    def __init__(self, steps: list, output_step_id: str, plan_id: str = "test-plan"):
        self.steps = steps
        self.output_step_id = output_step_id
        self.plan_id = plan_id

    def to_string(self, pretty: bool = False) -> str:
        return "(plan test)"


def resolve_source_cid(step_config: Dict[str, Any], input_hashes: Dict[str, str]) -> str:
    """
    Resolve CID for a SOURCE node.

    This is the logic that should be in execute_recipe - extracted here for unit testing.
    """
    source_cid = step_config.get("cid")

    # If source has :input true, resolve CID from input_hashes
    if not source_cid and step_config.get("input"):
        source_name = step_config.get("name", "")
        # Try various key formats for lookup
        name_variants = [
            source_name,
            source_name.lower().replace(" ", "-"),
            source_name.lower().replace(" ", "_"),
            source_name.lower(),
        ]
        for variant in name_variants:
            if variant in input_hashes:
                source_cid = input_hashes[variant]
                break

        if not source_cid:
            raise ValueError(
                f"SOURCE '{source_name}' not found in input_hashes. "
                f"Available: {list(input_hashes.keys())}"
            )

    return source_cid


class TestSourceCidResolution:
    """Tests for SOURCE node CID resolution from input_hashes."""

    def test_source_with_fixed_cid(self):
        """SOURCE with :cid should use that directly."""
        config = {"cid": "QmFixedCid123"}
        input_hashes = {}

        cid = resolve_source_cid(config, input_hashes)
        assert cid == "QmFixedCid123"

    def test_source_with_input_true_exact_match(self):
        """SOURCE with :input true should resolve from input_hashes by exact name."""
        config = {"input": True, "name": "my-video"}
        input_hashes = {"my-video": "QmInputVideo456"}

        cid = resolve_source_cid(config, input_hashes)
        assert cid == "QmInputVideo456"

    def test_source_with_input_true_normalized_dash(self):
        """SOURCE with :input true should resolve from normalized dash format."""
        config = {"input": True, "name": "Second Video"}
        input_hashes = {"second-video": "QmSecondVideo789"}

        cid = resolve_source_cid(config, input_hashes)
        assert cid == "QmSecondVideo789"

    def test_source_with_input_true_normalized_underscore(self):
        """SOURCE with :input true should resolve from normalized underscore format."""
        config = {"input": True, "name": "Second Video"}
        input_hashes = {"second_video": "QmSecondVideoUnderscore"}

        cid = resolve_source_cid(config, input_hashes)
        assert cid == "QmSecondVideoUnderscore"

    def test_source_with_input_true_lowercase(self):
        """SOURCE with :input true should resolve from lowercase format."""
        config = {"input": True, "name": "MyVideo"}
        input_hashes = {"myvideo": "QmLowercaseVideo"}

        cid = resolve_source_cid(config, input_hashes)
        assert cid == "QmLowercaseVideo"

    def test_source_with_input_true_missing_raises(self):
        """SOURCE with :input true should raise if not in input_hashes."""
        config = {"input": True, "name": "Missing Video"}
        input_hashes = {"other-video": "QmOther123"}

        with pytest.raises(ValueError) as excinfo:
            resolve_source_cid(config, input_hashes)

        assert "Missing Video" in str(excinfo.value)
        assert "not found in input_hashes" in str(excinfo.value)
        assert "other-video" in str(excinfo.value)  # Shows available keys

    def test_source_without_cid_or_input_returns_none(self):
        """SOURCE without :cid or :input should return None."""
        config = {"name": "some-source"}
        input_hashes = {}

        cid = resolve_source_cid(config, input_hashes)
        assert cid is None

    def test_source_priority_cid_over_input(self):
        """SOURCE with both :cid and :input true should use :cid."""
        config = {"cid": "QmDirectCid", "input": True, "name": "my-video"}
        input_hashes = {"my-video": "QmInputHashCid"}

        cid = resolve_source_cid(config, input_hashes)
        assert cid == "QmDirectCid"  # :cid takes priority


class TestSourceNameVariants:
    """Tests for the various input name normalization formats."""

    @pytest.mark.parametrize("source_name,input_key", [
        ("video", "video"),
        ("My Video", "my-video"),
        ("My Video", "my_video"),
        ("My Video", "My Video"),
        ("CamelCase", "camelcase"),
        ("multiple   spaces", "multiple   spaces"),  # Only single replace
    ])
    def test_name_variant_matching(self, source_name: str, input_key: str):
        """Various name formats should match."""
        config = {"input": True, "name": source_name}
        input_hashes = {input_key: "QmTestCid"}

        cid = resolve_source_cid(config, input_hashes)
        assert cid == "QmTestCid"


class TestExecuteRecipeIntegration:
    """Integration tests for execute_recipe with SOURCE nodes."""

    def test_recipe_with_user_input_source(self):
        """
        Recipe execution should resolve SOURCE nodes with :input true.

        This is the bug that was causing "No executor for node type: SOURCE".
        """
        # Create mock plan with a SOURCE that has :input true
        source_step = MockStep(
            step_id="source_1",
            node_type="SOURCE",
            config={"input": True, "name": "Second Video", "description": "User input"},
            cache_id="abc123",
            level=0,
        )

        effect_step = MockStep(
            step_id="effect_1",
            node_type="EFFECT",
            config={"effect": "invert"},
            cache_id="def456",
            input_steps=["source_1"],
            level=1,
        )

        plan = MockPlan(
            steps=[source_step, effect_step],
            output_step_id="effect_1",
        )

        # Input hashes provided by user
        input_hashes = {
            "second-video": "QmS4885aRikrjDB4yHPg9yTiPcBFWadZKVfAEvUy7B32zS"
        }

        # Verify source CID resolution works
        resolved_cid = resolve_source_cid(source_step.config, input_hashes)
        assert resolved_cid == "QmS4885aRikrjDB4yHPg9yTiPcBFWadZKVfAEvUy7B32zS"

    def test_recipe_with_fixed_and_user_sources(self):
        """
        Recipe with both fixed (asset) and user-input sources.

        This is the dog-invert-concat recipe pattern:
        - Fixed source: cat asset with known CID
        - User input: Second Video from input_hashes
        """
        fixed_source = MockStep(
            step_id="source_cat",
            node_type="SOURCE",
            config={"cid": "QmCatVideo123", "asset": "cat"},
            cache_id="cat_cache",
            level=0,
        )

        user_source = MockStep(
            step_id="source_user",
            node_type="SOURCE",
            config={"input": True, "name": "Second Video"},
            cache_id="user_cache",
            level=0,
        )

        input_hashes = {
            "second-video": "QmUserProvidedVideo456"
        }

        # Fixed source uses its cid
        fixed_cid = resolve_source_cid(fixed_source.config, input_hashes)
        assert fixed_cid == "QmCatVideo123"

        # User source resolves from input_hashes
        user_cid = resolve_source_cid(user_source.config, input_hashes)
        assert user_cid == "QmUserProvidedVideo456"


class TestCompoundNodeHandling:
    """
    Tests for COMPOUND node handling.

    COMPOUND nodes are collapsed effect chains that should be executed
    sequentially through their respective effect executors.

    Bug fixed: "No executor for node type: COMPOUND"
    """

    def test_compound_node_has_filter_chain(self):
        """COMPOUND nodes must have a filter_chain config."""
        step = MockStep(
            step_id="compound_1",
            node_type="COMPOUND",
            config={
                "filter_chain": [
                    {"type": "EFFECT", "config": {"effect": "identity"}},
                    {"type": "EFFECT", "config": {"effect": "dog"}},
                ],
                "inputs": ["source_1"],
            },
            cache_id="compound_cache",
            level=1,
        )

        assert step.node_type == "COMPOUND"
        assert "filter_chain" in step.config
        assert len(step.config["filter_chain"]) == 2

    def test_compound_filter_chain_has_effects(self):
        """COMPOUND filter_chain should contain EFFECT items with effect names."""
        filter_chain = [
            {"type": "EFFECT", "config": {"effect": "identity", "cid": "Qm123"}},
            {"type": "EFFECT", "config": {"effect": "dog", "cid": "Qm456"}},
        ]

        for item in filter_chain:
            assert item["type"] == "EFFECT"
            assert "effect" in item["config"]
            assert "cid" in item["config"]

    def test_compound_requires_inputs(self):
        """COMPOUND nodes must have input steps."""
        step = MockStep(
            step_id="compound_1",
            node_type="COMPOUND",
            config={"filter_chain": [], "inputs": []},
            cache_id="compound_cache",
            input_steps=[],
            level=1,
        )

        # Empty inputs should be detected as error
        assert len(step.input_steps) == 0
        # The execute_recipe should raise ValueError for empty inputs


class TestCacheIdLookup:
    """
    Tests for cache lookup by code-addressed cache_id.

    Bug fixed: Cache lookups by cache_id (code hash) were failing because
    only IPFS CID was indexed. Now we also index by node_id when different.
    """

    def test_cache_id_is_code_addressed(self):
        """cache_id should be a SHA3-256 hash (64 hex chars), not IPFS CID."""
        # Code-addressed hash example
        cache_id = "5702aaec14adaddda9baefa94d5842143749ee19e6bb7c1fa7068dce21f51ed4"

        assert len(cache_id) == 64
        assert all(c in "0123456789abcdef" for c in cache_id)
        assert not cache_id.startswith("Qm")  # Not IPFS CID

    def test_ipfs_cid_format(self):
        """IPFS CIDs start with 'Qm' (v0) or 'bafy' (v1)."""
        ipfs_cid_v0 = "QmXrj6tSSn1vQXxxEY2Tyoudvt4CeeqR9gGQwSt7WFrhMZ"
        ipfs_cid_v1 = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"

        assert ipfs_cid_v0.startswith("Qm")
        assert ipfs_cid_v1.startswith("bafy")

    def test_cache_id_differs_from_ipfs_cid(self):
        """
        Code-addressed cache_id is computed BEFORE execution.
        IPFS CID is computed AFTER execution from file content.
        They will differ for the same logical step.
        """
        # Same step can have:
        cache_id = "5702aaec14adaddda9baefa94d5842143749ee19e6bb7c1fa7068dce21f51ed4"
        ipfs_cid = "QmXrj6tSSn1vQXxxEY2Tyoudvt4CeeqR9gGQwSt7WFrhMZ"

        assert cache_id != ipfs_cid
        # Both should be indexable for cache lookups


class TestPlanStepTypes:
    """
    Tests verifying all node types in S-expression plans are handled.

    These tests document the node types that execute_recipe must handle.
    """

    def test_source_node_types(self):
        """SOURCE nodes: fixed asset or user input."""
        # Fixed asset source
        fixed = MockStep("s1", "SOURCE", {"cid": "Qm123"}, "cache1")
        assert fixed.node_type == "SOURCE"
        assert "cid" in fixed.config

        # User input source
        user = MockStep("s2", "SOURCE", {"input": True, "name": "video"}, "cache2")
        assert user.node_type == "SOURCE"
        assert user.config.get("input") is True

    def test_effect_node_type(self):
        """EFFECT nodes: single effect application."""
        step = MockStep(
            "e1", "EFFECT",
            {"effect": "invert", "cid": "QmEffect123", "intensity": 1.0},
            "cache3"
        )
        assert step.node_type == "EFFECT"
        assert "effect" in step.config

    def test_compound_node_type(self):
        """COMPOUND nodes: collapsed effect chains."""
        step = MockStep(
            "c1", "COMPOUND",
            {"filter_chain": [{"type": "EFFECT", "config": {}}]},
            "cache4"
        )
        assert step.node_type == "COMPOUND"
        assert "filter_chain" in step.config

    def test_sequence_node_type(self):
        """SEQUENCE nodes: concatenate multiple clips."""
        step = MockStep(
            "seq1", "SEQUENCE",
            {"transition": {"type": "cut"}},
            "cache5",
            input_steps=["clip1", "clip2"]
        )
        assert step.node_type == "SEQUENCE"


class TestNodeTypeCaseSensitivity:
    """
    Tests for node type case handling.

    Bug fixed: S-expression plans use lowercase (source, compound, effect)
    but code was checking uppercase (SOURCE, COMPOUND, EFFECT).
    """

    def test_source_lowercase_from_sexp(self):
        """S-expression plans produce lowercase node types."""
        # From plan: (source :cid "Qm...")
        step = MockStep("s1", "source", {"cid": "Qm123"}, "cache1")

        # Code should handle lowercase
        assert step.node_type.upper() == "SOURCE"

    def test_compound_lowercase_from_sexp(self):
        """COMPOUND from S-expression is lowercase."""
        # From plan: (compound :filter_chain ...)
        step = MockStep("c1", "compound", {"filter_chain": []}, "cache2")

        assert step.node_type.upper() == "COMPOUND"

    def test_effect_lowercase_from_sexp(self):
        """EFFECT from S-expression is lowercase."""
        # From plan: (effect :effect "invert" ...)
        step = MockStep("e1", "effect", {"effect": "invert"}, "cache3")

        assert step.node_type.upper() == "EFFECT"

    def test_sequence_lowercase_from_sexp(self):
        """SEQUENCE from S-expression is lowercase."""
        # From plan: (sequence :transition ...)
        step = MockStep("seq1", "sequence", {"transition": {}}, "cache4")

        assert step.node_type.upper() == "SEQUENCE"

    def test_node_type_comparison_must_be_case_insensitive(self):
        """
        Node type comparisons must be case-insensitive.

        This is the actual bug - checking step.node_type == "SOURCE"
        fails when step.node_type is "source" from S-expression.
        """
        sexp_types = ["source", "compound", "effect", "sequence"]
        code_types = ["SOURCE", "COMPOUND", "EFFECT", "SEQUENCE"]

        for sexp, code in zip(sexp_types, code_types):
            # Wrong: direct comparison fails
            assert sexp != code, f"{sexp} should not equal {code}"

            # Right: case-insensitive comparison works
            assert sexp.upper() == code, f"{sexp}.upper() should equal {code}"


class TestExecuteRecipeErrorHandling:
    """Tests for error handling in execute_recipe."""

    def test_missing_input_hash_error_message(self):
        """Error should list available input keys when source not found."""
        config = {"input": True, "name": "Unknown Video"}
        input_hashes = {"video-a": "Qm1", "video-b": "Qm2"}

        with pytest.raises(ValueError) as excinfo:
            resolve_source_cid(config, input_hashes)

        error_msg = str(excinfo.value)
        assert "Unknown Video" in error_msg
        assert "video-a" in error_msg or "video-b" in error_msg

    def test_source_no_cid_no_input_error(self):
        """SOURCE without cid or input flag should return None (invalid)."""
        config = {"name": "broken-source"}  # Missing both cid and input
        input_hashes = {}

        result = resolve_source_cid(config, input_hashes)
        assert result is None  # execute_recipe should catch this


class TestRecipeOutputRequired:
    """
    Tests verifying recipes must produce output to succeed.

    Bug: Recipe was returning success=True with output_cid=None
    """

    def test_recipe_without_output_should_fail(self):
        """
        Recipe execution must fail if no output is produced.

        This catches the bug where execute_recipe returned success=True
        but output_cid was None.
        """
        # Simulate the check that should happen in execute_recipe
        output_cid = None
        step_results = {"step1": {"status": "executed", "path": "/tmp/x"}}

        # This is the logic that should be in execute_recipe
        success = output_cid is not None

        assert success is False, "Recipe with no output_cid should fail"

    def test_recipe_with_output_should_succeed(self):
        """Recipe with valid output should succeed."""
        output_cid = "QmOutputCid123"

        success = output_cid is not None
        assert success is True

    def test_output_step_result_must_have_cid(self):
        """Output step result must contain cid or cache_id."""
        # Step result without cid
        bad_result = {"status": "executed", "path": "/tmp/output.mkv"}
        output_cid = bad_result.get("cid") or bad_result.get("cache_id")
        assert output_cid is None, "Should detect missing cid"

        # Step result with cid
        good_result = {"status": "executed", "path": "/tmp/output.mkv", "cid": "Qm123"}
        output_cid = good_result.get("cid") or good_result.get("cache_id")
        assert output_cid == "Qm123"

    def test_output_step_must_exist_in_results(self):
        """Output step must be present in step_results."""
        step_results = {
            "source_1": {"status": "source", "cid": "QmSrc"},
            "effect_1": {"status": "executed", "cid": "QmEffect"},
            # Note: output_step "sequence_1" is missing!
        }

        output_step_id = "sequence_1"
        output_result = step_results.get(output_step_id, {})
        output_cid = output_result.get("cid")

        assert output_cid is None, "Missing output step should result in no cid"