Files
rose-ash/scripts/migrate_one_per_file.py
giles 6528ce78b9 Scripts: page migration helpers for one-per-file layout
Python + shell tooling used to split grouped index.sx files into
one-directory-per-page layout (see the hyperscript gallery migration).
name-mapping.json records the rename table; strip_names.py is a helper
for extracting component names from .sx sources.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 09:09:15 +00:00

447 lines
14 KiB
Python

#!/usr/bin/env python3
"""Migrate sx_docs components to one-definition-per-file convention.
Reads all .sx files under sx/sx/ and sx/sxc/, splits multi-definition
files into one file per definition.
Usage:
python3 scripts/migrate_one_per_file.py --dry-run # preview
python3 scripts/migrate_one_per_file.py # execute
"""
import os
import sys
import json
import argparse
from pathlib import Path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from shared.sx.parser import parse_all, serialize
from shared.sx.types import Symbol, Keyword
NAMED_DEFS = {"defcomp", "defisland", "defmacro", "defpage",
"defhandler", "defstyle", "deftype", "defeffect",
"defrelation", "deftest"}
SKIP_FILES = {"boundary.sx"}
def get_def_info(expr):
"""Return (keyword, name) for a definition, or None."""
if not isinstance(expr, list) or not expr:
return None
head = expr[0]
if not isinstance(head, Symbol):
return None
kw = head.name
if kw in NAMED_DEFS:
if len(expr) < 2 or not isinstance(expr[1], Symbol):
return None
return (kw, expr[1].name.lstrip("~"))
if kw == "define":
if len(expr) < 2:
return None
if isinstance(expr[1], Symbol):
return ("define", expr[1].name)
elif (isinstance(expr[1], list) and expr[1]
and isinstance(expr[1][0], Symbol)):
return ("define", expr[1][0].name)
return None
return None
def derive_local_name(def_name, file_rel_path):
"""Derive short filename for a definition within the file's directory.
Strategy:
1. If name contains '/', split on LAST '/' → namespace + local.
Then strip redundant namespace-as-prefix from local.
2. If no '/', try stripping the file's full path (as hyphens) prefix.
3. Else use the full name.
Examples:
name: examples/card file: examples.sx → card
name: layouts/doc file: layouts.sx → doc
name: reactive-islands-demo/example-counter
file: reactive-islands/demo.sx → example-counter
name: geography-cek/geography-cek-cek-content
file: geography/cek.sx → cek-content
name: docs-nav-items file: nav-data.sx → docs-nav-items
"""
if '/' in def_name:
namespace, local = def_name.rsplit('/', 1)
# Strip redundant namespace prefix from local part
# e.g. geography-cek/geography-cek-cek-content → cek-content
ns_prefix = namespace + '-'
if local.startswith(ns_prefix):
stripped = local[len(ns_prefix):]
if stripped:
return stripped
return local
# No / in name — try stripping file path prefix
stem = os.path.splitext(file_rel_path)[0]
path_prefix = stem.replace('/', '-').replace('\\', '-') + '-'
if def_name.startswith(path_prefix):
remainder = def_name[len(path_prefix):]
if remainder:
return remainder
# Try just the file stem
file_stem = Path(file_rel_path).stem
stem_prefix = file_stem + '-'
if def_name.startswith(stem_prefix):
remainder = def_name[len(stem_prefix):]
if remainder:
return remainder
return def_name
def extract_form_sources(source, exprs):
"""Extract original source text for each top-level form.
Walks the source text tracking paren depth to find form boundaries.
Returns list of (source_text, is_comment_block) for each expression
plus any preceding comments.
"""
results = []
pos = 0
n = len(source)
for expr_idx in range(len(exprs)):
# Collect leading whitespace and comments
comment_lines = []
form_start = pos
while pos < n:
# Skip whitespace
while pos < n and source[pos] in ' \t\r\n':
pos += 1
if pos >= n:
break
if source[pos] == ';':
# Comment line
line_start = pos
while pos < n and source[pos] != '\n':
pos += 1
if pos < n:
pos += 1 # skip newline
comment_lines.append(source[line_start:pos].rstrip())
continue
# Found start of form
break
if pos >= n:
break
# Extract the form
if source[pos] == '(':
depth = 0
in_string = False
escape = False
form_body_start = pos
while pos < n:
c = source[pos]
if escape:
escape = False
elif c == '\\' and in_string:
escape = True
elif c == '"':
in_string = not in_string
elif not in_string:
if c == '(':
depth += 1
elif c == ')':
depth -= 1
if depth == 0:
pos += 1
break
pos += 1
form_text = source[form_body_start:pos]
# Include preceding comments
if comment_lines:
full_text = '\n'.join(comment_lines) + '\n' + form_text
else:
full_text = form_text
results.append(full_text)
else:
# Non-paren form (symbol, etc.)
start = pos
while pos < n and source[pos] not in ' \t\r\n':
pos += 1
results.append(source[start:pos])
return results
def process_directory(base_dir, dry_run=True):
"""Process all .sx files, return split plan."""
splits = [] # (source_file, target_file, content, kw, old_name)
single = [] # (source_file, kw, old_name)
no_defs = [] # files with no definitions
errors = []
for root, dirs, files in os.walk(base_dir):
dirs[:] = [d for d in dirs if d not in ('__pycache__', '.cache', '.pytest_cache')]
for filename in sorted(files):
if not filename.endswith('.sx') or filename in SKIP_FILES:
continue
filepath = os.path.join(root, filename)
rel_path = os.path.relpath(filepath, base_dir)
try:
with open(filepath, encoding='utf-8') as f:
source = f.read()
except Exception as e:
errors.append((rel_path, str(e)))
continue
try:
exprs = parse_all(source)
except Exception as e:
errors.append((rel_path, f"Parse: {e}"))
continue
# Classify
defs = []
non_defs = []
for expr in exprs:
info = get_def_info(expr)
if info:
defs.append((expr, info))
else:
non_defs.append(expr)
if not defs:
no_defs.append(rel_path)
continue
if len(defs) == 1 and not non_defs:
# Single definition — stays as-is
_, (kw, name) = defs[0]
single.append((rel_path, kw, name))
continue
# Multiple definitions — split
file_stem = Path(filename).stem
file_dir = os.path.dirname(rel_path)
target_dir = os.path.join(file_dir, file_stem)
# Get original source for each form
form_sources = extract_form_sources(source, exprs)
all_exprs = []
for expr in exprs:
info = get_def_info(expr)
all_exprs.append((expr, info))
# Deduplicate: keep only the LAST definition for each name
seen_names = {}
for idx, (expr, info) in enumerate(all_exprs):
if info:
seen_names[info[1]] = idx
last_idx_for_name = set(seen_names.values())
for idx, (expr, info) in enumerate(all_exprs):
if info is None:
# Non-def form
continue
if idx not in last_idx_for_name:
# Earlier duplicate — skip
continue
kw, name = info
local = derive_local_name(name, rel_path)
safe_local = local.replace('/', '-')
target_file = os.path.join(target_dir, safe_local + '.sx')
# Use original source if available, else serialize
if idx < len(form_sources):
content = form_sources[idx]
else:
content = serialize(expr, pretty=True)
splits.append((rel_path, target_file, content, kw, name))
# Non-def forms: collect into _init.sx
if non_defs:
init_parts = []
# Find their original source
non_def_idx = 0
for idx, (expr, info) in enumerate(all_exprs):
if info is None:
if idx < len(form_sources):
init_parts.append(form_sources[idx])
else:
init_parts.append(serialize(expr, pretty=True))
non_def_idx += 1
if init_parts:
init_content = '\n\n'.join(init_parts)
init_file = os.path.join(target_dir, '_init.sx')
splits.append((rel_path, init_file, init_content, 'init', '_init'))
return splits, single, no_defs, errors
def main():
parser = argparse.ArgumentParser(description="Migrate SX to one-per-file")
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--dir", default=None)
args = parser.parse_args()
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
os.chdir(project_root)
dirs = [args.dir] if args.dir else ["sx/sx", "sx/sxc"]
all_splits = []
all_single = []
all_no_defs = []
all_errors = []
for d in dirs:
if not os.path.isdir(d):
print(f"Skip {d}")
continue
print(f"\n{'='*60}")
print(f" {d}")
print(f"{'='*60}")
splits, single, no_defs, errors = process_directory(d, args.dry_run)
# Prefix with base dir for full paths
for s in splits:
src, tgt, content, kw, name = s
all_splits.append((d, os.path.join(d, src), os.path.join(d, tgt),
content, kw, name))
for s in single:
path, kw, name = s
all_single.append((d, os.path.join(d, path), kw, name))
all_no_defs.extend(os.path.join(d, p) for p in no_defs)
all_errors.extend((os.path.join(d, p), e) for p, e in errors)
# Check conflicts
target_map = {}
conflicts = []
for _, src, tgt, content, kw, name in all_splits:
if tgt in target_map:
conflicts.append((tgt, target_map[tgt], (kw, name, src)))
else:
target_map[tgt] = (kw, name, src)
# Group splits by source file
by_source = {}
for _, src, tgt, content, kw, name in all_splits:
by_source.setdefault(src, []).append((tgt, kw, name))
# Report
if all_errors:
print(f"\n--- Errors ({len(all_errors)}) ---")
for p, e in all_errors:
print(f" {p}: {e}")
if conflicts:
print(f"\n--- {len(conflicts)} Conflicts ---")
for tgt, existing, new in conflicts:
print(f" {tgt}")
print(f" existing: {existing[1]} from {existing[2]}")
print(f" new: {new[1]} from {new[2]}")
total_new = len(all_splits)
print(f"\n{'='*60}")
print(f" Summary")
print(f"{'='*60}")
print(f" Files to split: {len(by_source)}")
print(f" New files: {total_new}")
print(f" Single-def (keep): {len(all_single)}")
print(f" No-defs (skip): {len(all_no_defs)}")
print(f" Conflicts: {len(conflicts)}")
if args.dry_run:
print(f"\n--- Split plan ---")
for src in sorted(by_source.keys()):
targets = by_source[src]
print(f"\n {src}{len(targets)} files:")
for tgt, kw, name in sorted(targets):
print(f" {tgt} ({kw})")
print(f"\n--- Single-def files ---")
for _, path, kw, name in sorted(all_single)[:15]:
print(f" {path} ({kw} {name})")
if len(all_single) > 15:
print(f" ... and {len(all_single) - 15} more")
# Show a sample
if all_splits:
_, src, tgt, content, kw, name = all_splits[0]
print(f"\n--- Sample: {tgt} ---")
lines = content.split('\n')
for line in lines[:15]:
print(f" {line}")
if len(lines) > 15:
print(f" ... ({len(lines)} lines total)")
print(f"\nDry run. Run without --dry-run to execute.")
return
# Execute
if conflicts:
print("Aborting due to conflicts.")
sys.exit(1)
created = 0
for _, src, tgt, content, kw, name in all_splits:
os.makedirs(os.path.dirname(tgt), exist_ok=True)
if os.path.exists(tgt):
print(f" SKIP (exists): {tgt}")
continue
with open(tgt, 'w', encoding='utf-8') as f:
f.write(content.rstrip() + '\n')
created += 1
# Delete source files
deleted = 0
for src in by_source:
if os.path.exists(src):
os.remove(src)
deleted += 1
print(f"\n Created {created}, deleted {deleted} source files.")
# Build name mapping: old_name -> new_path_name
mapping = {}
for _, src, tgt, _, kw, old_name in all_splits:
if kw in ('init', 'preamble'):
continue
# Determine which base_dir this target is in
for d in dirs:
if tgt.startswith(d + '/'):
new_name = os.path.splitext(os.path.relpath(tgt, d))[0]
if old_name != new_name:
mapping[old_name] = new_name
break
mapping_file = "scripts/name-mapping.json"
with open(mapping_file, 'w') as f:
json.dump(mapping, f, indent=2, sort_keys=True)
print(f" Name mapping: {mapping_file} ({len(mapping)} entries)")
if __name__ == "__main__":
main()