celery/app/services/recipe_service.py

"""
Recipe Service - business logic for recipe management.

Recipes are S-expressions stored in the content-addressed cache (and IPFS).
The recipe ID is the content hash of the file.
"""

import tempfile
from pathlib import Path
from typing import Optional, List, Dict, Any, Tuple, TYPE_CHECKING

from artdag.sexp import compile_string, parse, serialize, CompileError, ParseError

if TYPE_CHECKING:
    import redis
    from cache_manager import L1CacheManager

from ..types import Recipe, CompiledDAG, VisualizationDAG, VisNode, VisEdge


class RecipeService:
    """
    Service for managing recipes.

    Recipes are S-expressions stored in the content-addressed cache.
    """

    def __init__(self, redis: "redis.Redis", cache: "L1CacheManager") -> None:
        # Redis kept for compatibility but not used for recipe storage
        self.redis = redis
        self.cache = cache

    async def get_recipe(self, recipe_id: str) -> Optional[Recipe]:
        """Get a recipe by ID (content hash)."""
        import yaml
        import logging
        logger = logging.getLogger(__name__)

        # Get from cache (content-addressed storage)
        logger.info(f"get_recipe: Looking up recipe_id={recipe_id[:16]}...")
        path = self.cache.get_by_cid(recipe_id)
        logger.info(f"get_recipe: cache.get_by_cid returned path={path}")
        if not path or not path.exists():
            logger.warning(f"get_recipe: Recipe {recipe_id[:16]}... not found in cache")
            return None

        with open(path) as f:
            content = f.read()

        # Detect format - check if it starts with ( after skipping comments
        def is_sexp_format(text):
            for line in text.split('\n'):
                stripped = line.strip()
                if not stripped or stripped.startswith(';'):
                    continue
                return stripped.startswith('(')
            return False

        import logging
        logger = logging.getLogger(__name__)

        if is_sexp_format(content):
            # Detect if this is a streaming recipe (starts with (stream ...))
            def is_streaming_recipe(text):
                for line in text.split('\n'):
                    stripped = line.strip()
                    if not stripped or stripped.startswith(';'):
                        continue
                    return stripped.startswith('(stream')
                return False

            if is_streaming_recipe(content):
                # Streaming recipes have different format - parse manually
                import re
                name_match = re.search(r'\(stream\s+"([^"]+)"', content)
                recipe_name = name_match.group(1) if name_match else "streaming"

                recipe_data = {
                    "name": recipe_name,
                    "sexp": content,
                    "format": "sexp",
                    "type": "streaming",
                    "dag": {"nodes": []},  # Streaming recipes don't have traditional DAG
                }
                logger.info(f"Parsed streaming recipe {recipe_id[:16]}..., name: {recipe_name}")
            else:
                # Parse traditional (recipe ...) S-expression
                try:
                    compiled = compile_string(content)
                    recipe_data = compiled.to_dict()
                    recipe_data["sexp"] = content
                    recipe_data["format"] = "sexp"
                    logger.info(f"Parsed sexp recipe {recipe_id[:16]}..., keys: {list(recipe_data.keys())}")
                except (ParseError, CompileError) as e:
                    logger.warning(f"Failed to parse sexp recipe {recipe_id[:16]}...: {e}")
                    return {"error": str(e), "recipe_id": recipe_id}
        else:
            # Parse YAML
            try:
                recipe_data = yaml.safe_load(content)
                if not isinstance(recipe_data, dict):
                    return {"error": "Invalid YAML: expected dictionary", "recipe_id": recipe_id}
                recipe_data["yaml"] = content
                recipe_data["format"] = "yaml"
            except yaml.YAMLError as e:
                return {"error": f"YAML parse error: {e}", "recipe_id": recipe_id}

        # Add the recipe_id to the data for convenience
        recipe_data["recipe_id"] = recipe_id

        # Get IPFS CID if available
        ipfs_cid = self.cache.get_ipfs_cid(recipe_id)
        if ipfs_cid:
            recipe_data["ipfs_cid"] = ipfs_cid

        # Compute step_count from nodes (handle both formats)
        if recipe_data.get("format") == "sexp":
            nodes = recipe_data.get("dag", {}).get("nodes", [])
        else:
            # YAML format: nodes might be at top level or under dag
            nodes = recipe_data.get("nodes", recipe_data.get("dag", {}).get("nodes", []))
        recipe_data["step_count"] = len(nodes) if isinstance(nodes, (list, dict)) else 0

        return recipe_data

    async def list_recipes(self, actor_id: Optional[str] = None, offset: int = 0, limit: int = 20) -> List[Recipe]:
        """
        List recipes owned by a user.

        Queries item_types table for user's recipe links.
        """
        import logging
        import database
        logger = logging.getLogger(__name__)

        recipes = []

        if not actor_id:
            logger.warning("list_recipes called without actor_id")
            return []

        # Get user's recipe CIDs from item_types
        user_items = await database.get_user_items(actor_id, item_type="recipe", limit=1000)
        recipe_cids = [item["cid"] for item in user_items]
        logger.info(f"Found {len(recipe_cids)} recipe CIDs for user {actor_id}")

        for cid in recipe_cids:
            recipe = await self.get_recipe(cid)
            if recipe and not recipe.get("error"):
                recipes.append(recipe)
            elif recipe and recipe.get("error"):
                logger.warning(f"Recipe {cid[:16]}... has error: {recipe.get('error')}")

        # Add friendly names
        from .naming_service import get_naming_service
        naming = get_naming_service()
        for recipe in recipes:
            recipe_id = recipe.get("recipe_id")
            if recipe_id:
                friendly = await naming.get_by_cid(actor_id, recipe_id)
                if friendly:
                    recipe["friendly_name"] = friendly["friendly_name"]
                    recipe["base_name"] = friendly["base_name"]

        # Sort by name
        recipes.sort(key=lambda r: r.get("name", ""))

        return recipes[offset:offset + limit]

    async def upload_recipe(
        self,
        content: str,
        uploader: str,
        name: str = None,
        description: str = None,
    ) -> Tuple[Optional[str], Optional[str]]:
        """
        Upload a recipe from S-expression content.

        The recipe is stored in the cache and pinned to IPFS.
        Returns (recipe_id, error_message).
        """
        # Validate S-expression
        try:
            compiled = compile_string(content)
        except ParseError as e:
            return None, f"Parse error: {e}"
        except CompileError as e:
            return None, f"Compile error: {e}"

        # Write to temp file for caching
        import logging
        logger = logging.getLogger(__name__)
        try:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".sexp", mode="w") as tmp:
                tmp.write(content)
                tmp_path = Path(tmp.name)

            # Store in cache (content-addressed, auto-pins to IPFS)
            logger.info(f"upload_recipe: Storing recipe in cache from {tmp_path}")
            cached, ipfs_cid = self.cache.put(tmp_path, node_type="recipe", move=True)
            recipe_id = ipfs_cid or cached.cid  # Prefer IPFS CID
            logger.info(f"upload_recipe: Stored recipe, cached.cid={cached.cid[:16]}..., ipfs_cid={ipfs_cid[:16] if ipfs_cid else None}, recipe_id={recipe_id[:16]}...")

            # Track ownership in item_types and assign friendly name
            if uploader:
                import database
                display_name = name or compiled.name or "unnamed-recipe"

                # Create item_types entry (ownership link)
                await database.save_item_metadata(
                    cid=recipe_id,
                    actor_id=uploader,
                    item_type="recipe",
                    description=description,
                    filename=f"{display_name}.sexp",
                )

                # Assign friendly name
                from .naming_service import get_naming_service
                naming = get_naming_service()
                await naming.assign_name(
                    cid=recipe_id,
                    actor_id=uploader,
                    item_type="recipe",
                    display_name=display_name,
                )

            return recipe_id, None

        except Exception as e:
            return None, f"Failed to cache recipe: {e}"

    async def delete_recipe(self, recipe_id: str, actor_id: str = None) -> Tuple[bool, Optional[str]]:
        """
        Remove user's ownership link to a recipe.

        This removes the item_types entry linking the user to the recipe.
        The cached file is only deleted if no other users own it.
        Returns (success, error_message).
        """
        import database

        if not actor_id:
            return False, "actor_id required"

        # Remove user's ownership link
        try:
            await database.delete_item_type(recipe_id, actor_id, "recipe")

            # Also remove friendly name
            await database.delete_friendly_name(actor_id, recipe_id)

            # Try to garbage collect if no one owns it anymore
            # (delete_cache_item only deletes if no item_types remain)
            await database.delete_cache_item(recipe_id)

            return True, None
        except Exception as e:
            return False, f"Failed to delete: {e}"

    def parse_recipe(self, content: str) -> CompiledDAG:
        """Parse recipe S-expression content."""
        compiled = compile_string(content)
        return compiled.to_dict()

    def build_dag(self, recipe: Recipe) -> VisualizationDAG:
        """
        Build DAG visualization data from recipe.

        Returns nodes and edges for Cytoscape.js.
        """
        vis_nodes: List[VisNode] = []
        edges: List[VisEdge] = []

        dag = recipe.get("dag", {})
        dag_nodes = dag.get("nodes", [])
        output_node = dag.get("output")

        # Handle list format (compiled S-expression)
        if isinstance(dag_nodes, list):
            for node_def in dag_nodes:
                node_id = node_def.get("id")
                node_type = node_def.get("type", "EFFECT")

                vis_nodes.append({
                    "data": {
                        "id": node_id,
                        "label": node_id,
                        "nodeType": node_type,
                        "isOutput": node_id == output_node,
                    }
                })

                for input_ref in node_def.get("inputs", []):
                    if isinstance(input_ref, dict):
                        source = input_ref.get("node") or input_ref.get("input")
                    else:
                        source = input_ref

                    if source:
                        edges.append({
                            "data": {
                                "source": source,
                                "target": node_id,
                            }
                        })

        # Handle dict format
        elif isinstance(dag_nodes, dict):
            for node_id, node_def in dag_nodes.items():
                node_type = node_def.get("type", "EFFECT")

                vis_nodes.append({
                    "data": {
                        "id": node_id,
                        "label": node_id,
                        "nodeType": node_type,
                        "isOutput": node_id == output_node,
                    }
                })

                for input_ref in node_def.get("inputs", []):
                    if isinstance(input_ref, dict):
                        source = input_ref.get("node") or input_ref.get("input")
                    else:
                        source = input_ref

                    if source:
                        edges.append({
                            "data": {
                                "source": source,
                                "target": node_id,
                            }
                        })

        return {"nodes": vis_nodes, "edges": edges}