diff --git a/docs/sexp-architecture-plan.md b/docs/sexp-architecture-plan.md index 28c0e27..b331ba3 100644 --- a/docs/sexp-architecture-plan.md +++ b/docs/sexp-architecture-plan.md @@ -622,6 +622,38 @@ Each phase is independently deployable. The end state: a platform where the appl **Source material ported from:** `artdag/core/artdag/sexp/parser.py` and `evaluator.py`. Stripped DAG-specific types (Binding), replaced Lambda dataclass with callable closure, added defcomp/Component, added web-oriented string primitives, added &key/&rest support in parser. +### Phase 2: HTML Renderer — COMPLETE + +**Branch:** `sexpression` + +**Delivered** (`shared/sexp/html.py`): +- HSX-style renderer: s-expression AST → HTML string +- ~100 HTML tags recognised (sections, headings, grouping, text, embedded, table, forms, interactive, template) +- 14 void elements (br, img, input, meta, link, etc.) — no closing tag +- 23 boolean attributes (disabled, checked, required, hidden, etc.) +- Text and attribute escaping (XSS prevention: &, <, >, ") +- `raw!` for trusted unescaped HTML +- `<>` fragment rendering (no wrapper element) +- Render-aware special forms: `if`, `when`, `cond`, `let`/`let*`, `begin`/`do`, `map`, `map-indexed`, `filter`, `for-each`, `define`, `defcomp` — these call `_render` on result branches so HTML tags inside control flow work correctly +- `_render_component()` — render-aware component calling (vs evaluator's `_call_component` which only evaluates) +- `_render_lambda_call()` — lambda bodies containing HTML tags are rendered directly +- `_RawHTML` marker type — pre-rendered children pass through without double-escaping +- Component children rendered to HTML string and wrapped as `_RawHTML` for safe embedding + +**Key architectural decision:** The renderer maintains a parallel set of special form handlers (`_RENDER_FORMS`) that mirror the evaluator's special forms but call `_render` on results instead of `_eval`. This is necessary because the evaluator doesn't know about HTML tags — `_eval((p "Hello"))` fails with "Undefined symbol: p". The renderer intercepts these forms before they reach the evaluator. + +**Dispatch order in `_render_list`:** +1. `raw!` → unescaped HTML +2. `<>` → fragment +3. `_RENDER_FORMS` (checked before HTML_TAGS because `map` is both a render form and an HTML tag) +4. `HTML_TAGS` → element rendering +5. `~prefix` → component rendering +6. Fallthrough → `_eval` then `_render` + +**Tests** (`shared/sexp/tests/test_html.py`): +- 63 tests: escaping (4), atoms (8), elements (6), attributes (8), boolean attrs (4), void elements (7), fragments (3), raw! (3), components (4), expressions with control flow (8), full pages (3), edge cases (5) +- **172 total tests across all 3 files, all passing** + ### Test Infrastructure — COMPLETE **Delivered:** diff --git a/shared/sexp/html.py b/shared/sexp/html.py new file mode 100644 index 0000000..40ea6f4 --- /dev/null +++ b/shared/sexp/html.py @@ -0,0 +1,473 @@ +""" +HSX-style HTML renderer. + +Walks an s-expression tree and emits an HTML string. HTML elements are +recognised by tag name; everything else is evaluated via the s-expression +evaluator and then rendered recursively. + +Usage:: + + from shared.sexp import parse, make_env + from shared.sexp.html import render + + expr = parse('(div :class "card" (h1 "Hello") (p "World"))') + html = render(expr) + # → '

Hello

World

' + +Components defined with ``defcomp`` are evaluated and their result is +rendered as HTML:: + + env = {} + evaluate(parse('(defcomp ~card (&key title &rest children) ...)'), env) + html = render(parse('(~card :title "Hi" (p "body"))'), env) +""" + +from __future__ import annotations + +from typing import Any + +from .types import Component, Keyword, Lambda, NIL, Symbol +from .evaluator import _eval, _call_component + + +class _RawHTML: + """Marker for pre-rendered HTML that should not be escaped.""" + __slots__ = ("html",) + + def __init__(self, html: str): + self.html = html + + +# --------------------------------------------------------------------------- +# HTML constants +# --------------------------------------------------------------------------- + +# Tags that must not have a closing tag +VOID_ELEMENTS = frozenset({ + "area", "base", "br", "col", "embed", "hr", "img", "input", + "link", "meta", "param", "source", "track", "wbr", +}) + +# Standard HTML tags (subset — any symbol that isn't recognised here will be +# treated as a function call and evaluated instead of rendered as a tag). +HTML_TAGS = frozenset({ + # Root / document + "html", "head", "body", + # Metadata + "title", "meta", "link", "style", "script", "base", "noscript", + # Sections + "header", "footer", "main", "nav", "aside", "section", "article", + "address", "hgroup", + # Headings + "h1", "h2", "h3", "h4", "h5", "h6", + # Grouping + "div", "p", "blockquote", "pre", "figure", "figcaption", + "ul", "ol", "li", "dl", "dt", "dd", "hr", + # Text + "a", "span", "em", "strong", "small", "s", "cite", "q", + "abbr", "code", "var", "samp", "kbd", "sub", "sup", + "i", "b", "u", "mark", "ruby", "rt", "rp", + "bdi", "bdo", "br", "wbr", "time", "data", + # Edits + "ins", "del", + # Embedded + "img", "picture", "source", "iframe", "embed", "object", "param", + "video", "audio", "track", "canvas", "map", "area", + "svg", "math", + # Table + "table", "thead", "tbody", "tfoot", "tr", "th", "td", + "caption", "colgroup", "col", + # Forms + "form", "fieldset", "legend", "label", "input", "button", + "select", "option", "optgroup", "textarea", "output", + "datalist", "progress", "meter", + # Interactive + "details", "summary", "dialog", + # Template + "template", "slot", +}) + +# Attributes that are boolean (presence = true, absence = false) +BOOLEAN_ATTRS = frozenset({ + "async", "autofocus", "autoplay", "checked", "controls", + "default", "defer", "disabled", "formnovalidate", "hidden", + "inert", "ismap", "loop", "multiple", "muted", "nomodule", + "novalidate", "open", "playsinline", "readonly", "required", + "reversed", "selected", +}) + + +# --------------------------------------------------------------------------- +# Escaping +# --------------------------------------------------------------------------- + +def escape_text(s: str) -> str: + """Escape text content for safe HTML embedding.""" + return ( + s.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + ) + + +def escape_attr(s: str) -> str: + """Escape an attribute value for safe embedding in double quotes.""" + return ( + s.replace("&", "&") + .replace('"', """) + .replace("<", "<") + .replace(">", ">") + ) + + +# --------------------------------------------------------------------------- +# Renderer +# --------------------------------------------------------------------------- + +def render(expr: Any, env: dict[str, Any] | None = None) -> str: + """Render an s-expression as an HTML string. + + *expr* can be: + - A parsed (unevaluated) s-expression from ``parse()`` + - An already-evaluated value (string, list, etc.) + + *env* provides variable bindings for evaluation. + """ + if env is None: + env = {} + return _render(expr, env) + + +def _render(expr: Any, env: dict[str, Any]) -> str: + # --- nil / None / False → empty string -------------------------------- + if expr is None or expr is NIL or expr is False: + return "" + + # --- True → empty (typically from a boolean expression, not content) --- + if expr is True: + return "" + + # --- pre-rendered HTML → pass through ---------------------------------- + if isinstance(expr, _RawHTML): + return expr.html + + # --- string → escaped text -------------------------------------------- + if isinstance(expr, str): + return escape_text(expr) + + # --- number → string -------------------------------------------------- + if isinstance(expr, (int, float)): + return escape_text(str(expr)) + + # --- symbol → evaluate then render ------------------------------------ + if isinstance(expr, Symbol): + val = _eval(expr, env) + return _render(val, env) + + # --- keyword → its name (unlikely in render context, but safe) -------- + if isinstance(expr, Keyword): + return escape_text(expr.name) + + # --- list → main dispatch --------------------------------------------- + if isinstance(expr, list): + if not expr: + return "" + return _render_list(expr, env) + + # --- dict → skip (data, not renderable) ------------------------------- + if isinstance(expr, dict): + return "" + + # --- fallback --------------------------------------------------------- + return escape_text(str(expr)) + + +# --------------------------------------------------------------------------- +# Render-aware special forms +# --------------------------------------------------------------------------- +# These mirror the evaluator's special forms but call _render on the result +# branches, so that HTML tags inside (if ...), (when ...), (let ...) etc. +# are rendered correctly instead of being evaluated as function calls. + +def _rsf_if(expr: list, env: dict[str, Any]) -> str: + cond = _eval(expr[1], env) + if cond and cond is not NIL: + return _render(expr[2], env) + if len(expr) > 3: + return _render(expr[3], env) + return "" + + +def _rsf_when(expr: list, env: dict[str, Any]) -> str: + cond = _eval(expr[1], env) + if cond and cond is not NIL: + parts = [] + for body_expr in expr[2:]: + parts.append(_render(body_expr, env)) + return "".join(parts) + return "" + + +def _rsf_cond(expr: list, env: dict[str, Any]) -> str: + from .types import Keyword as Kw + clauses = expr[1:] + if not clauses: + return "" + # Scheme-style: ((test body) ...) + if isinstance(clauses[0], list) and len(clauses[0]) == 2: + for clause in clauses: + test = clause[0] + if isinstance(test, Symbol) and test.name in ("else", ":else"): + return _render(clause[1], env) + if isinstance(test, Kw) and test.name == "else": + return _render(clause[1], env) + if _eval(test, env): + return _render(clause[1], env) + else: + # Clojure-style: test body test body ... + i = 0 + while i < len(clauses) - 1: + test = clauses[i] + result = clauses[i + 1] + if isinstance(test, Kw) and test.name == "else": + return _render(result, env) + if isinstance(test, Symbol) and test.name in (":else", "else"): + return _render(result, env) + if _eval(test, env): + return _render(result, env) + i += 2 + return "" + + +def _rsf_let(expr: list, env: dict[str, Any]) -> str: + bindings = expr[1] + local = dict(env) + if isinstance(bindings, list): + if bindings and isinstance(bindings[0], list): + for binding in bindings: + var = binding[0] + vname = var.name if isinstance(var, Symbol) else var + local[vname] = _eval(binding[1], local) + elif len(bindings) % 2 == 0: + for i in range(0, len(bindings), 2): + var = bindings[i] + vname = var.name if isinstance(var, Symbol) else var + local[vname] = _eval(bindings[i + 1], local) + parts = [] + for body_expr in expr[2:]: + parts.append(_render(body_expr, local)) + return "".join(parts) + + +def _rsf_begin(expr: list, env: dict[str, Any]) -> str: + parts = [] + for sub in expr[1:]: + parts.append(_render(sub, env)) + return "".join(parts) + + +def _rsf_define(expr: list, env: dict[str, Any]) -> str: + _eval(expr, env) # side effect: define in env + return "" + + +def _rsf_defcomp(expr: list, env: dict[str, Any]) -> str: + _eval(expr, env) # side effect: register component + return "" + + +def _render_lambda_call(fn: Lambda, args: tuple, env: dict[str, Any]) -> str: + """Call a lambda and render the result — the body may contain HTML tags.""" + local = dict(fn.closure) + local.update(env) + for p, v in zip(fn.params, args): + local[p] = v + return _render(fn.body, local) + + +def _rsf_map(expr: list, env: dict[str, Any]) -> str: + fn = _eval(expr[1], env) + coll = _eval(expr[2], env) + parts = [] + for item in coll: + if isinstance(fn, Lambda): + parts.append(_render_lambda_call(fn, (item,), env)) + elif callable(fn): + parts.append(_render(fn(item), env)) + else: + parts.append(_render(item, env)) + return "".join(parts) + + +def _rsf_map_indexed(expr: list, env: dict[str, Any]) -> str: + fn = _eval(expr[1], env) + coll = _eval(expr[2], env) + parts = [] + for i, item in enumerate(coll): + if isinstance(fn, Lambda): + parts.append(_render_lambda_call(fn, (i, item), env)) + elif callable(fn): + parts.append(_render(fn(i, item), env)) + else: + parts.append(_render(item, env)) + return "".join(parts) + + +def _rsf_filter(expr: list, env: dict[str, Any]) -> str: + # filter returns a list — render each kept item + result = _eval(expr, env) + return _render(result, env) + + +def _rsf_for_each(expr: list, env: dict[str, Any]) -> str: + fn = _eval(expr[1], env) + coll = _eval(expr[2], env) + parts = [] + for item in coll: + if isinstance(fn, Lambda): + parts.append(_render_lambda_call(fn, (item,), env)) + elif callable(fn): + parts.append(_render(fn(item), env)) + else: + parts.append(_render(item, env)) + return "".join(parts) + + +_RENDER_FORMS: dict[str, Any] = { + "if": _rsf_if, + "when": _rsf_when, + "cond": _rsf_cond, + "let": _rsf_let, + "let*": _rsf_let, + "begin": _rsf_begin, + "do": _rsf_begin, + "define": _rsf_define, + "defcomp": _rsf_defcomp, + "map": _rsf_map, + "map-indexed": _rsf_map_indexed, + "filter": _rsf_filter, + "for-each": _rsf_for_each, +} + + +def _render_component(comp: Component, args: list, env: dict[str, Any]) -> str: + """Render-aware component call: sets up scope then renders the body.""" + kwargs: dict[str, Any] = {} + children: list[Any] = [] + i = 0 + while i < len(args): + arg = args[i] + if isinstance(arg, Keyword) and i + 1 < len(args): + kwargs[arg.name] = _eval(args[i + 1], env) + i += 2 + else: + children.append(arg) + i += 1 + + local = dict(comp.closure) + local.update(env) + for p in comp.params: + if p in kwargs: + local[p] = kwargs[p] + else: + local[p] = NIL + if comp.has_children: + # Render children to HTML and wrap as _RawHTML to prevent re-escaping + local["children"] = _RawHTML("".join(_render(c, env) for c in children)) + return _render(comp.body, local) + + +def _render_list(expr: list, env: dict[str, Any]) -> str: + """Render a list expression — could be an HTML element, special form, + component call, or data list.""" + head = expr[0] + + if isinstance(head, Symbol): + name = head.name + + # --- raw! → unescaped HTML ---------------------------------------- + if name == "raw!": + parts = [] + for arg in expr[1:]: + val = _eval(arg, env) + if isinstance(val, str): + parts.append(val) + elif val is not None and val is not NIL: + parts.append(str(val)) + return "".join(parts) + + # --- <> → fragment (render children, no wrapper) ------------------ + if name == "<>": + return "".join(_render(child, env) for child in expr[1:]) + + # --- Render-aware special forms -------------------------------------- + # Check BEFORE HTML_TAGS because some names overlap (e.g. `map`). + if name in _RENDER_FORMS: + return _RENDER_FORMS[name](expr, env) + + # --- HTML tag → render as element --------------------------------- + if name in HTML_TAGS: + return _render_element(name, expr[1:], env) + + # --- Component (~prefix) → render-aware component call ------------ + if name.startswith("~"): + val = env.get(name) + if isinstance(val, Component): + return _render_component(val, expr[1:], env) + # Fall through to evaluation + + # --- Other special forms / function calls → evaluate then render --- + result = _eval(expr, env) + return _render(result, env) + + # --- head is lambda or other callable → evaluate then render ---------- + if isinstance(head, (Lambda, list)): + result = _eval(expr, env) + return _render(result, env) + + # --- data list → render each item ------------------------------------- + return "".join(_render(item, env) for item in expr) + + +def _render_element(tag: str, args: list, env: dict[str, Any]) -> str: + """Render an HTML element: extract attrs (keywords), render children.""" + attrs: dict[str, Any] = {} + children: list[Any] = [] + + i = 0 + while i < len(args): + arg = args[i] + # Keyword followed by value → attribute + if isinstance(arg, Keyword) and i + 1 < len(args): + attr_name = arg.name + attr_val = _eval(args[i + 1], env) + attrs[attr_name] = attr_val + i += 2 + else: + children.append(arg) + i += 1 + + # Build opening tag + parts = [f"<{tag}"] + for attr_name, attr_val in attrs.items(): + if attr_val is None or attr_val is NIL or attr_val is False: + continue + if attr_name in BOOLEAN_ATTRS: + if attr_val: + parts.append(f" {attr_name}") + elif attr_val is True: + parts.append(f" {attr_name}") + else: + parts.append(f' {attr_name}="{escape_attr(str(attr_val))}"') + parts.append(">") + + opening = "".join(parts) + + # Void elements: no closing tag, no children + if tag in VOID_ELEMENTS: + return opening + + # Render children + child_html = "".join(_render(child, env) for child in children) + + return f"{opening}{child_html}" diff --git a/shared/sexp/tests/test_html.py b/shared/sexp/tests/test_html.py new file mode 100644 index 0000000..8cdc067 --- /dev/null +++ b/shared/sexp/tests/test_html.py @@ -0,0 +1,365 @@ +"""Tests for the HSX-style HTML renderer.""" + +import pytest +from shared.sexp import parse, evaluate +from shared.sexp.html import render, escape_text, escape_attr + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def r(text, env=None): + """Parse and render a single expression.""" + return render(parse(text), env) + + +# --------------------------------------------------------------------------- +# Escaping +# --------------------------------------------------------------------------- + +class TestEscaping: + def test_escape_text_ampersand(self): + assert escape_text("A & B") == "A & B" + + def test_escape_text_lt_gt(self): + assert escape_text("")') + assert "