Cache parsed components for 10x faster startup (2s → 200ms)

- Fix O(n²) postprocessing: compute_all_deps/io_refs/hash were called
  per-file (92x for sx app). Now deferred to single finalize_components()
  call after all files load.
- Add pickle cache in shared/sx/.cache/ keyed by file mtimes+sizes.
  Cache stores fully-processed Component/Island/Macro objects with deps,
  io_refs, and css_classes pre-computed. Closures stripped before pickle,
  rebuilt from global env after restore.
- Smart finalization: cached loads skip deps/io_refs recomputation
  (already in pickle), only recompute component hash.
- Fix ~sx-header → ~layouts/header ref in docs-content.sx

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-12 23:54:38 +00:00
parent bc1ea0128f
commit 100450772f
5 changed files with 224 additions and 42 deletions

View File

@@ -118,6 +118,11 @@ def create_base_app(
setup_jinja(app)
setup_sx_bridge(app)
load_shared_components()
# Finalize shared components (deps, hash) now — service apps that call
# load_service_components() will re-finalize after loading their own.
from shared.sx.jinja_bridge import _rebuild_closures, _finalize_if_needed
_rebuild_closures()
_finalize_if_needed()
load_relation_registry()
# Load defquery/defaction definitions from {service}/queries.sx and actions.sx

View File

@@ -13,14 +13,18 @@ from .jinja_bridge import load_sx_dir, register_reload_callback, watch_sx_dir
def load_shared_components() -> None:
"""Register all shared s-expression components."""
"""Register all shared s-expression components.
Defers finalization (deps/hash) so the calling app can load service
components before the single finalize pass.
"""
# Load SX libraries first — reader macros (#z3 etc.) must resolve
# before any .sx file that uses them is parsed
_load_sx_libraries()
register_reload_callback(_load_sx_libraries)
templates_dir = os.path.join(os.path.dirname(__file__), "templates")
load_sx_dir(templates_dir)
load_sx_dir(templates_dir, _finalize=False)
watch_sx_dir(templates_dir)
@@ -32,4 +36,4 @@ def _load_sx_libraries() -> None:
path = os.path.join(ref_dir, name)
if os.path.exists(path):
with open(path, encoding="utf-8") as f:
register_components(f.read())
register_components(f.read(), _defer_postprocess=True)

View File

@@ -22,10 +22,13 @@ from __future__ import annotations
import glob
import hashlib
import logging
import os
import pickle
import time
from typing import Any
from .types import NIL, Component, Island, Keyword, Macro, Symbol
from .types import NIL, Component, Island, Keyword, Lambda, Macro, Symbol
from .parser import parse
import os as _os
if _os.environ.get("SX_USE_REF") == "1":
@@ -33,6 +36,8 @@ if _os.environ.get("SX_USE_REF") == "1":
else:
from .html import render as html_render, _render_component
_logger = logging.getLogger("sx.bridge")
# ---------------------------------------------------------------------------
# Shared component environment
@@ -97,30 +102,193 @@ def _compute_component_hash() -> None:
_COMPONENT_HASH = ""
def load_sx_dir(directory: str) -> None:
_CACHE_DIR = os.path.join(os.path.dirname(__file__), ".cache")
def _cache_key_for_dir(directory: str, files: list[str]) -> str:
"""Compute a cache key from sorted file paths + mtimes + sizes."""
parts = []
for fp in files:
st = os.stat(fp)
parts.append(f"{fp}:{st.st_mtime_ns}:{st.st_size}")
return hashlib.sha256("\n".join(parts).encode()).hexdigest()[:16]
def _cache_path(directory: str, key: str) -> str:
"""Return the cache file path for a directory."""
dir_hash = hashlib.sha256(directory.encode()).hexdigest()[:12]
return os.path.join(_CACHE_DIR, f"sx_{dir_hash}_{key}.pkl")
def _try_load_cache(directory: str, files: list[str]) -> bool:
"""Try to restore components from a pickle cache.
Returns True if cache was valid and components were restored.
"""
key = _cache_key_for_dir(directory, files)
path = _cache_path(directory, key)
if not os.path.exists(path):
return False
try:
with open(path, "rb") as f:
cached = pickle.load(f)
_COMPONENT_ENV.update(cached["env"])
_CLIENT_LIBRARY_SOURCES.extend(cached["client_sources"])
_logger.info("Cache hit: %s (%d entries)", directory, len(cached["env"]))
return True
except Exception as e:
_logger.warning("Cache load failed for %s: %s", directory, e)
try:
os.remove(path)
except OSError:
pass
return False
def _save_cache(
directory: str,
files: list[str],
env_entries: dict[str, Any],
client_sources: list[str],
) -> None:
"""Save component env entries to a pickle cache."""
key = _cache_key_for_dir(directory, files)
path = _cache_path(directory, key)
try:
os.makedirs(_CACHE_DIR, exist_ok=True)
# Strip closures before pickling — they reference the global env
# and would bloat/fail the pickle. Closures are rebuilt after load.
stripped = _strip_closures(env_entries)
with open(path, "wb") as f:
pickle.dump({"env": stripped, "client_sources": client_sources}, f,
protocol=pickle.HIGHEST_PROTOCOL)
# Clean stale caches for this directory
dir_hash = hashlib.sha256(directory.encode()).hexdigest()[:12]
prefix = f"sx_{dir_hash}_"
for old in os.listdir(_CACHE_DIR):
if old.startswith(prefix) and old != os.path.basename(path):
try:
os.remove(os.path.join(_CACHE_DIR, old))
except OSError:
pass
except Exception as e:
_logger.warning("Cache save failed for %s: %s", directory, e)
def _strip_closures(env_entries: dict[str, Any]) -> dict[str, Any]:
"""Return a copy of env entries with closures emptied for pickling."""
out: dict[str, Any] = {}
for key, val in env_entries.items():
if isinstance(val, Component):
out[key] = Component(
name=val.name, params=list(val.params),
has_children=val.has_children, body=val.body,
closure={}, css_classes=set(val.css_classes),
deps=set(val.deps), io_refs=set(val.io_refs) if val.io_refs else None,
affinity=val.affinity, param_types=dict(val.param_types) if val.param_types else None,
)
elif isinstance(val, Island):
out[key] = Island(
name=val.name, params=list(val.params),
has_children=val.has_children, body=val.body,
closure={}, css_classes=set(val.css_classes),
deps=set(val.deps), io_refs=set(val.io_refs) if val.io_refs else None,
)
elif isinstance(val, Macro):
out[key] = Macro(
params=list(val.params), rest_param=val.rest_param,
body=val.body, closure={}, name=val.name,
)
elif isinstance(val, Lambda):
out[key] = Lambda(
params=list(val.params), body=val.body,
closure={}, name=val.name,
)
else:
# Basic values (dicts, lists, strings, numbers) — pickle directly
out[key] = val
return out
def _rebuild_closures() -> None:
"""Point all component/lambda closures at the global env.
After cache restore, closures are empty. The evaluator merges
closure + caller-env at call time, and the caller env is always
_COMPONENT_ENV, so this is safe.
"""
for val in _COMPONENT_ENV.values():
if isinstance(val, (Component, Island, Lambda, Macro)):
val.closure = _COMPONENT_ENV
_dirs_from_cache: set[str] = set()
def load_sx_dir(directory: str, *, _finalize: bool = True) -> None:
"""Load all .sx files from a directory and register components.
Skips boundary.sx — those are parsed separately by the boundary validator.
Files starting with ``;; @client`` have their source stored for delivery
to the browser (so ``define`` forms are available client-side).
Uses a pickle cache keyed by file mtimes — if no files changed,
components are restored from cache without parsing or evaluation.
"""
for filepath in sorted(
glob.glob(os.path.join(directory, "**", "*.sx"), recursive=True)
):
if os.path.basename(filepath) == "boundary.sx":
continue
t0 = time.monotonic()
files = sorted(
fp for fp in glob.glob(os.path.join(directory, "**", "*.sx"), recursive=True)
if os.path.basename(fp) != "boundary.sx"
)
if not files:
return
# Try cache first
if _try_load_cache(directory, files):
_dirs_from_cache.add(directory)
if _finalize:
_rebuild_closures()
_finalize_if_needed()
t1 = time.monotonic()
_logger.info("Loaded %s from cache in %.1fms", directory, (t1 - t0) * 1000)
return
# Cache miss — full parse + eval
env_before = set(_COMPONENT_ENV.keys())
new_client_sources: list[str] = []
for filepath in files:
with open(filepath, encoding="utf-8") as f:
source = f.read()
if source.lstrip().startswith(";; @client"):
# Parse and re-serialize to normalize syntax sugar.
# The Python parser accepts ' for quote but the bootstrapped
# client parser uses #' — re-serializing emits (quote x).
from .parser import parse_all, serialize
exprs = parse_all(source)
_CLIENT_LIBRARY_SOURCES.append(
"\n".join(serialize(e) for e in exprs)
)
register_components(source)
normalized = "\n".join(serialize(e) for e in exprs)
new_client_sources.append(normalized)
_CLIENT_LIBRARY_SOURCES.append(normalized)
register_components(source, _defer_postprocess=True)
if _finalize:
finalize_components()
# Save cache AFTER finalization so deps/io_refs are included
new_entries = {k: v for k, v in _COMPONENT_ENV.items() if k not in env_before}
_save_cache(directory, files, new_entries, new_client_sources)
t1 = time.monotonic()
_logger.info("Loaded %s (%d files, %d new) in %.1fms",
directory, len(files), len(new_entries), (t1 - t0) * 1000)
def _finalize_if_needed() -> None:
"""Skip heavy deps/io_refs recomputation if all directories were cached.
Cached components already have deps and io_refs populated.
Only the hash needs recomputing (it depends on all components).
"""
_compute_component_hash()
# ---------------------------------------------------------------------------
@@ -149,9 +317,7 @@ def watch_sx_dir(directory: str) -> None:
def reload_if_changed() -> None:
"""Re-read sx files if any have changed on disk. Called per-request in dev."""
import logging
import time
_logger = logging.getLogger("sx.reload")
reload_logger = logging.getLogger("sx.reload")
changed_files = []
for directory in _watched_dirs:
@@ -164,17 +330,22 @@ def reload_if_changed() -> None:
changed_files.append(fp)
if changed_files:
for fp in changed_files:
_logger.info("Changed: %s", fp)
reload_logger.info("Changed: %s", fp)
t0 = time.monotonic()
_COMPONENT_ENV.clear()
_CLIENT_LIBRARY_SOURCES.clear()
_dirs_from_cache.clear()
# Reload SX libraries first (e.g. z3.sx) so reader macros resolve
for cb in _reload_callbacks:
cb()
# Load all directories with deferred finalization
for directory in _watched_dirs:
load_sx_dir(directory)
load_sx_dir(directory, _finalize=False)
# Finalize once after all directories are loaded
_rebuild_closures()
finalize_components()
t1 = time.monotonic()
_logger.info("Reloaded %d file(s), components in %.1fms",
reload_logger.info("Reloaded %d file(s), components in %.1fms",
len(changed_files), (t1 - t0) * 1000)
# Recompute render plans for all services that have pages
@@ -182,7 +353,7 @@ def reload_if_changed() -> None:
for svc in _PAGE_REGISTRY:
t2 = time.monotonic()
compute_page_render_plans(svc)
_logger.info("Render plans for %s in %.1fms", svc, (time.monotonic() - t2) * 1000)
reload_logger.info("Render plans for %s in %.1fms", svc, (time.monotonic() - t2) * 1000)
def load_service_components(service_dir: str, service_name: str | None = None) -> None:
@@ -190,12 +361,17 @@ def load_service_components(service_dir: str, service_name: str | None = None) -
Components from ``{service_dir}/sx/`` and handlers from
``{service_dir}/sx/handlers/`` or ``{service_dir}/sx/handlers.sx``.
This is called after ``load_shared_components()`` which defers
finalization, so we finalize here (once for shared + service).
"""
sx_dir = os.path.join(service_dir, "sx")
if os.path.isdir(sx_dir):
load_sx_dir(sx_dir)
load_sx_dir(sx_dir) # finalize=True by default
watch_sx_dir(sx_dir)
_rebuild_closures()
# Load handler definitions if service_name is provided
if service_name:
load_handler_dir(os.path.join(sx_dir, "handlers"), service_name)
@@ -213,21 +389,12 @@ def load_handler_dir(directory: str, service_name: str) -> None:
_load(directory, service_name)
def register_components(sx_source: str) -> None:
def register_components(sx_source: str, *, _defer_postprocess: bool = False) -> None:
"""Parse and evaluate s-expression component definitions into the
shared environment.
Typically called at app startup::
register_components('''
(defcomp ~shared:fragments/link-card (&key link title image icon)
(a :href link :class "block rounded ..."
(div :class "flex ..."
(if image
(img :src image :class "...")
(div :class "..." (i :class icon)))
(div :class "..." (div :class "..." title)))))
''')
When *_defer_postprocess* is True, skip deps/io_refs/hash computation.
Call ``finalize_components()`` once after all files are loaded.
"""
from .ref.sx_ref import eval_expr as _raw_eval, trampoline as _trampoline
_eval = lambda expr, env: _trampoline(_raw_eval(expr, env))
@@ -242,8 +409,6 @@ def register_components(sx_source: str) -> None:
_eval(expr, _COMPONENT_ENV)
# Pre-scan CSS classes for newly registered components.
# Scan the full source once — components from the same file share the set.
# Slightly over-counts per component but safe and avoids re-scanning at request time.
all_classes: set[str] | None = None
for key, val in _COMPONENT_ENV.items():
if key not in existing and isinstance(val, (Component, Island)):
@@ -251,11 +416,18 @@ def register_components(sx_source: str) -> None:
all_classes = scan_classes_from_sx(sx_source)
val.css_classes = set(all_classes)
# Recompute transitive deps for all components (cheap — just AST walking)
if not _defer_postprocess:
finalize_components()
def finalize_components() -> None:
"""Compute deps, IO refs, and hash for all registered components.
Called once after all component files are loaded.
"""
from .deps import compute_all_deps, compute_all_io_refs, get_all_io_names
compute_all_deps(_COMPONENT_ENV)
compute_all_io_refs(_COMPONENT_ENV, get_all_io_names())
_compute_component_hash()