Python + shell tooling used to split grouped index.sx files into one-directory-per-page layout (see the hyperscript gallery migration). name-mapping.json records the rename table; strip_names.py is a helper for extracting component names from .sx sources. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
447 lines
14 KiB
Python
447 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""Migrate sx_docs components to one-definition-per-file convention.
|
|
|
|
Reads all .sx files under sx/sx/ and sx/sxc/, splits multi-definition
|
|
files into one file per definition.
|
|
|
|
Usage:
|
|
python3 scripts/migrate_one_per_file.py --dry-run # preview
|
|
python3 scripts/migrate_one_per_file.py # execute
|
|
"""
|
|
import os
|
|
import sys
|
|
import json
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from shared.sx.parser import parse_all, serialize
|
|
from shared.sx.types import Symbol, Keyword
|
|
|
|
NAMED_DEFS = {"defcomp", "defisland", "defmacro", "defpage",
|
|
"defhandler", "defstyle", "deftype", "defeffect",
|
|
"defrelation", "deftest"}
|
|
|
|
SKIP_FILES = {"boundary.sx"}
|
|
|
|
|
|
def get_def_info(expr):
|
|
"""Return (keyword, name) for a definition, or None."""
|
|
if not isinstance(expr, list) or not expr:
|
|
return None
|
|
head = expr[0]
|
|
if not isinstance(head, Symbol):
|
|
return None
|
|
|
|
kw = head.name
|
|
|
|
if kw in NAMED_DEFS:
|
|
if len(expr) < 2 or not isinstance(expr[1], Symbol):
|
|
return None
|
|
return (kw, expr[1].name.lstrip("~"))
|
|
|
|
if kw == "define":
|
|
if len(expr) < 2:
|
|
return None
|
|
if isinstance(expr[1], Symbol):
|
|
return ("define", expr[1].name)
|
|
elif (isinstance(expr[1], list) and expr[1]
|
|
and isinstance(expr[1][0], Symbol)):
|
|
return ("define", expr[1][0].name)
|
|
return None
|
|
|
|
return None
|
|
|
|
|
|
def derive_local_name(def_name, file_rel_path):
|
|
"""Derive short filename for a definition within the file's directory.
|
|
|
|
Strategy:
|
|
1. If name contains '/', split on LAST '/' → namespace + local.
|
|
Then strip redundant namespace-as-prefix from local.
|
|
2. If no '/', try stripping the file's full path (as hyphens) prefix.
|
|
3. Else use the full name.
|
|
|
|
Examples:
|
|
name: examples/card file: examples.sx → card
|
|
name: layouts/doc file: layouts.sx → doc
|
|
name: reactive-islands-demo/example-counter
|
|
file: reactive-islands/demo.sx → example-counter
|
|
name: geography-cek/geography-cek-cek-content
|
|
file: geography/cek.sx → cek-content
|
|
name: docs-nav-items file: nav-data.sx → docs-nav-items
|
|
"""
|
|
if '/' in def_name:
|
|
namespace, local = def_name.rsplit('/', 1)
|
|
# Strip redundant namespace prefix from local part
|
|
# e.g. geography-cek/geography-cek-cek-content → cek-content
|
|
ns_prefix = namespace + '-'
|
|
if local.startswith(ns_prefix):
|
|
stripped = local[len(ns_prefix):]
|
|
if stripped:
|
|
return stripped
|
|
return local
|
|
|
|
# No / in name — try stripping file path prefix
|
|
stem = os.path.splitext(file_rel_path)[0]
|
|
path_prefix = stem.replace('/', '-').replace('\\', '-') + '-'
|
|
if def_name.startswith(path_prefix):
|
|
remainder = def_name[len(path_prefix):]
|
|
if remainder:
|
|
return remainder
|
|
|
|
# Try just the file stem
|
|
file_stem = Path(file_rel_path).stem
|
|
stem_prefix = file_stem + '-'
|
|
if def_name.startswith(stem_prefix):
|
|
remainder = def_name[len(stem_prefix):]
|
|
if remainder:
|
|
return remainder
|
|
|
|
return def_name
|
|
|
|
|
|
def extract_form_sources(source, exprs):
|
|
"""Extract original source text for each top-level form.
|
|
|
|
Walks the source text tracking paren depth to find form boundaries.
|
|
Returns list of (source_text, is_comment_block) for each expression
|
|
plus any preceding comments.
|
|
"""
|
|
results = []
|
|
pos = 0
|
|
n = len(source)
|
|
|
|
for expr_idx in range(len(exprs)):
|
|
# Collect leading whitespace and comments
|
|
comment_lines = []
|
|
form_start = pos
|
|
|
|
while pos < n:
|
|
# Skip whitespace
|
|
while pos < n and source[pos] in ' \t\r\n':
|
|
pos += 1
|
|
if pos >= n:
|
|
break
|
|
|
|
if source[pos] == ';':
|
|
# Comment line
|
|
line_start = pos
|
|
while pos < n and source[pos] != '\n':
|
|
pos += 1
|
|
if pos < n:
|
|
pos += 1 # skip newline
|
|
comment_lines.append(source[line_start:pos].rstrip())
|
|
continue
|
|
|
|
# Found start of form
|
|
break
|
|
|
|
if pos >= n:
|
|
break
|
|
|
|
# Extract the form
|
|
if source[pos] == '(':
|
|
depth = 0
|
|
in_string = False
|
|
escape = False
|
|
form_body_start = pos
|
|
|
|
while pos < n:
|
|
c = source[pos]
|
|
if escape:
|
|
escape = False
|
|
elif c == '\\' and in_string:
|
|
escape = True
|
|
elif c == '"':
|
|
in_string = not in_string
|
|
elif not in_string:
|
|
if c == '(':
|
|
depth += 1
|
|
elif c == ')':
|
|
depth -= 1
|
|
if depth == 0:
|
|
pos += 1
|
|
break
|
|
pos += 1
|
|
|
|
form_text = source[form_body_start:pos]
|
|
# Include preceding comments
|
|
if comment_lines:
|
|
full_text = '\n'.join(comment_lines) + '\n' + form_text
|
|
else:
|
|
full_text = form_text
|
|
|
|
results.append(full_text)
|
|
else:
|
|
# Non-paren form (symbol, etc.)
|
|
start = pos
|
|
while pos < n and source[pos] not in ' \t\r\n':
|
|
pos += 1
|
|
results.append(source[start:pos])
|
|
|
|
return results
|
|
|
|
|
|
def process_directory(base_dir, dry_run=True):
|
|
"""Process all .sx files, return split plan."""
|
|
|
|
splits = [] # (source_file, target_file, content, kw, old_name)
|
|
single = [] # (source_file, kw, old_name)
|
|
no_defs = [] # files with no definitions
|
|
errors = []
|
|
|
|
for root, dirs, files in os.walk(base_dir):
|
|
dirs[:] = [d for d in dirs if d not in ('__pycache__', '.cache', '.pytest_cache')]
|
|
|
|
for filename in sorted(files):
|
|
if not filename.endswith('.sx') or filename in SKIP_FILES:
|
|
continue
|
|
|
|
filepath = os.path.join(root, filename)
|
|
rel_path = os.path.relpath(filepath, base_dir)
|
|
|
|
try:
|
|
with open(filepath, encoding='utf-8') as f:
|
|
source = f.read()
|
|
except Exception as e:
|
|
errors.append((rel_path, str(e)))
|
|
continue
|
|
|
|
try:
|
|
exprs = parse_all(source)
|
|
except Exception as e:
|
|
errors.append((rel_path, f"Parse: {e}"))
|
|
continue
|
|
|
|
# Classify
|
|
defs = []
|
|
non_defs = []
|
|
for expr in exprs:
|
|
info = get_def_info(expr)
|
|
if info:
|
|
defs.append((expr, info))
|
|
else:
|
|
non_defs.append(expr)
|
|
|
|
if not defs:
|
|
no_defs.append(rel_path)
|
|
continue
|
|
|
|
if len(defs) == 1 and not non_defs:
|
|
# Single definition — stays as-is
|
|
_, (kw, name) = defs[0]
|
|
single.append((rel_path, kw, name))
|
|
continue
|
|
|
|
# Multiple definitions — split
|
|
file_stem = Path(filename).stem
|
|
file_dir = os.path.dirname(rel_path)
|
|
target_dir = os.path.join(file_dir, file_stem)
|
|
|
|
# Get original source for each form
|
|
form_sources = extract_form_sources(source, exprs)
|
|
|
|
all_exprs = []
|
|
for expr in exprs:
|
|
info = get_def_info(expr)
|
|
all_exprs.append((expr, info))
|
|
|
|
# Deduplicate: keep only the LAST definition for each name
|
|
seen_names = {}
|
|
for idx, (expr, info) in enumerate(all_exprs):
|
|
if info:
|
|
seen_names[info[1]] = idx
|
|
last_idx_for_name = set(seen_names.values())
|
|
|
|
for idx, (expr, info) in enumerate(all_exprs):
|
|
if info is None:
|
|
# Non-def form
|
|
continue
|
|
if idx not in last_idx_for_name:
|
|
# Earlier duplicate — skip
|
|
continue
|
|
|
|
kw, name = info
|
|
local = derive_local_name(name, rel_path)
|
|
safe_local = local.replace('/', '-')
|
|
target_file = os.path.join(target_dir, safe_local + '.sx')
|
|
|
|
# Use original source if available, else serialize
|
|
if idx < len(form_sources):
|
|
content = form_sources[idx]
|
|
else:
|
|
content = serialize(expr, pretty=True)
|
|
|
|
splits.append((rel_path, target_file, content, kw, name))
|
|
|
|
# Non-def forms: collect into _init.sx
|
|
if non_defs:
|
|
init_parts = []
|
|
# Find their original source
|
|
non_def_idx = 0
|
|
for idx, (expr, info) in enumerate(all_exprs):
|
|
if info is None:
|
|
if idx < len(form_sources):
|
|
init_parts.append(form_sources[idx])
|
|
else:
|
|
init_parts.append(serialize(expr, pretty=True))
|
|
non_def_idx += 1
|
|
|
|
if init_parts:
|
|
init_content = '\n\n'.join(init_parts)
|
|
init_file = os.path.join(target_dir, '_init.sx')
|
|
splits.append((rel_path, init_file, init_content, 'init', '_init'))
|
|
|
|
return splits, single, no_defs, errors
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Migrate SX to one-per-file")
|
|
parser.add_argument("--dry-run", action="store_true")
|
|
parser.add_argument("--dir", default=None)
|
|
args = parser.parse_args()
|
|
|
|
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
os.chdir(project_root)
|
|
|
|
dirs = [args.dir] if args.dir else ["sx/sx", "sx/sxc"]
|
|
|
|
all_splits = []
|
|
all_single = []
|
|
all_no_defs = []
|
|
all_errors = []
|
|
|
|
for d in dirs:
|
|
if not os.path.isdir(d):
|
|
print(f"Skip {d}")
|
|
continue
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f" {d}")
|
|
print(f"{'='*60}")
|
|
|
|
splits, single, no_defs, errors = process_directory(d, args.dry_run)
|
|
|
|
# Prefix with base dir for full paths
|
|
for s in splits:
|
|
src, tgt, content, kw, name = s
|
|
all_splits.append((d, os.path.join(d, src), os.path.join(d, tgt),
|
|
content, kw, name))
|
|
for s in single:
|
|
path, kw, name = s
|
|
all_single.append((d, os.path.join(d, path), kw, name))
|
|
all_no_defs.extend(os.path.join(d, p) for p in no_defs)
|
|
all_errors.extend((os.path.join(d, p), e) for p, e in errors)
|
|
|
|
# Check conflicts
|
|
target_map = {}
|
|
conflicts = []
|
|
for _, src, tgt, content, kw, name in all_splits:
|
|
if tgt in target_map:
|
|
conflicts.append((tgt, target_map[tgt], (kw, name, src)))
|
|
else:
|
|
target_map[tgt] = (kw, name, src)
|
|
|
|
# Group splits by source file
|
|
by_source = {}
|
|
for _, src, tgt, content, kw, name in all_splits:
|
|
by_source.setdefault(src, []).append((tgt, kw, name))
|
|
|
|
# Report
|
|
if all_errors:
|
|
print(f"\n--- Errors ({len(all_errors)}) ---")
|
|
for p, e in all_errors:
|
|
print(f" {p}: {e}")
|
|
|
|
if conflicts:
|
|
print(f"\n--- {len(conflicts)} Conflicts ---")
|
|
for tgt, existing, new in conflicts:
|
|
print(f" {tgt}")
|
|
print(f" existing: {existing[1]} from {existing[2]}")
|
|
print(f" new: {new[1]} from {new[2]}")
|
|
|
|
total_new = len(all_splits)
|
|
print(f"\n{'='*60}")
|
|
print(f" Summary")
|
|
print(f"{'='*60}")
|
|
print(f" Files to split: {len(by_source)}")
|
|
print(f" New files: {total_new}")
|
|
print(f" Single-def (keep): {len(all_single)}")
|
|
print(f" No-defs (skip): {len(all_no_defs)}")
|
|
print(f" Conflicts: {len(conflicts)}")
|
|
|
|
if args.dry_run:
|
|
print(f"\n--- Split plan ---")
|
|
for src in sorted(by_source.keys()):
|
|
targets = by_source[src]
|
|
print(f"\n {src} → {len(targets)} files:")
|
|
for tgt, kw, name in sorted(targets):
|
|
print(f" {tgt} ({kw})")
|
|
|
|
print(f"\n--- Single-def files ---")
|
|
for _, path, kw, name in sorted(all_single)[:15]:
|
|
print(f" {path} ({kw} {name})")
|
|
if len(all_single) > 15:
|
|
print(f" ... and {len(all_single) - 15} more")
|
|
|
|
# Show a sample
|
|
if all_splits:
|
|
_, src, tgt, content, kw, name = all_splits[0]
|
|
print(f"\n--- Sample: {tgt} ---")
|
|
lines = content.split('\n')
|
|
for line in lines[:15]:
|
|
print(f" {line}")
|
|
if len(lines) > 15:
|
|
print(f" ... ({len(lines)} lines total)")
|
|
|
|
print(f"\nDry run. Run without --dry-run to execute.")
|
|
return
|
|
|
|
# Execute
|
|
if conflicts:
|
|
print("Aborting due to conflicts.")
|
|
sys.exit(1)
|
|
|
|
created = 0
|
|
for _, src, tgt, content, kw, name in all_splits:
|
|
os.makedirs(os.path.dirname(tgt), exist_ok=True)
|
|
if os.path.exists(tgt):
|
|
print(f" SKIP (exists): {tgt}")
|
|
continue
|
|
with open(tgt, 'w', encoding='utf-8') as f:
|
|
f.write(content.rstrip() + '\n')
|
|
created += 1
|
|
|
|
# Delete source files
|
|
deleted = 0
|
|
for src in by_source:
|
|
if os.path.exists(src):
|
|
os.remove(src)
|
|
deleted += 1
|
|
|
|
print(f"\n Created {created}, deleted {deleted} source files.")
|
|
|
|
# Build name mapping: old_name -> new_path_name
|
|
mapping = {}
|
|
for _, src, tgt, _, kw, old_name in all_splits:
|
|
if kw in ('init', 'preamble'):
|
|
continue
|
|
# Determine which base_dir this target is in
|
|
for d in dirs:
|
|
if tgt.startswith(d + '/'):
|
|
new_name = os.path.splitext(os.path.relpath(tgt, d))[0]
|
|
if old_name != new_name:
|
|
mapping[old_name] = new_name
|
|
break
|
|
|
|
mapping_file = "scripts/name-mapping.json"
|
|
with open(mapping_file, 'w') as f:
|
|
json.dump(mapping, f, indent=2, sort_keys=True)
|
|
print(f" Name mapping: {mapping_file} ({len(mapping)} entries)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|