Scripts: page migration helpers for one-per-file layout
Python + shell tooling used to split grouped index.sx files into one-directory-per-page layout (see the hyperscript gallery migration). name-mapping.json records the rename table; strip_names.py is a helper for extracting component names from .sx sources. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
446
scripts/migrate_one_per_file.py
Normal file
446
scripts/migrate_one_per_file.py
Normal file
@@ -0,0 +1,446 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Migrate sx_docs components to one-definition-per-file convention.
|
||||
|
||||
Reads all .sx files under sx/sx/ and sx/sxc/, splits multi-definition
|
||||
files into one file per definition.
|
||||
|
||||
Usage:
|
||||
python3 scripts/migrate_one_per_file.py --dry-run # preview
|
||||
python3 scripts/migrate_one_per_file.py # execute
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from shared.sx.parser import parse_all, serialize
|
||||
from shared.sx.types import Symbol, Keyword
|
||||
|
||||
NAMED_DEFS = {"defcomp", "defisland", "defmacro", "defpage",
|
||||
"defhandler", "defstyle", "deftype", "defeffect",
|
||||
"defrelation", "deftest"}
|
||||
|
||||
SKIP_FILES = {"boundary.sx"}
|
||||
|
||||
|
||||
def get_def_info(expr):
|
||||
"""Return (keyword, name) for a definition, or None."""
|
||||
if not isinstance(expr, list) or not expr:
|
||||
return None
|
||||
head = expr[0]
|
||||
if not isinstance(head, Symbol):
|
||||
return None
|
||||
|
||||
kw = head.name
|
||||
|
||||
if kw in NAMED_DEFS:
|
||||
if len(expr) < 2 or not isinstance(expr[1], Symbol):
|
||||
return None
|
||||
return (kw, expr[1].name.lstrip("~"))
|
||||
|
||||
if kw == "define":
|
||||
if len(expr) < 2:
|
||||
return None
|
||||
if isinstance(expr[1], Symbol):
|
||||
return ("define", expr[1].name)
|
||||
elif (isinstance(expr[1], list) and expr[1]
|
||||
and isinstance(expr[1][0], Symbol)):
|
||||
return ("define", expr[1][0].name)
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def derive_local_name(def_name, file_rel_path):
|
||||
"""Derive short filename for a definition within the file's directory.
|
||||
|
||||
Strategy:
|
||||
1. If name contains '/', split on LAST '/' → namespace + local.
|
||||
Then strip redundant namespace-as-prefix from local.
|
||||
2. If no '/', try stripping the file's full path (as hyphens) prefix.
|
||||
3. Else use the full name.
|
||||
|
||||
Examples:
|
||||
name: examples/card file: examples.sx → card
|
||||
name: layouts/doc file: layouts.sx → doc
|
||||
name: reactive-islands-demo/example-counter
|
||||
file: reactive-islands/demo.sx → example-counter
|
||||
name: geography-cek/geography-cek-cek-content
|
||||
file: geography/cek.sx → cek-content
|
||||
name: docs-nav-items file: nav-data.sx → docs-nav-items
|
||||
"""
|
||||
if '/' in def_name:
|
||||
namespace, local = def_name.rsplit('/', 1)
|
||||
# Strip redundant namespace prefix from local part
|
||||
# e.g. geography-cek/geography-cek-cek-content → cek-content
|
||||
ns_prefix = namespace + '-'
|
||||
if local.startswith(ns_prefix):
|
||||
stripped = local[len(ns_prefix):]
|
||||
if stripped:
|
||||
return stripped
|
||||
return local
|
||||
|
||||
# No / in name — try stripping file path prefix
|
||||
stem = os.path.splitext(file_rel_path)[0]
|
||||
path_prefix = stem.replace('/', '-').replace('\\', '-') + '-'
|
||||
if def_name.startswith(path_prefix):
|
||||
remainder = def_name[len(path_prefix):]
|
||||
if remainder:
|
||||
return remainder
|
||||
|
||||
# Try just the file stem
|
||||
file_stem = Path(file_rel_path).stem
|
||||
stem_prefix = file_stem + '-'
|
||||
if def_name.startswith(stem_prefix):
|
||||
remainder = def_name[len(stem_prefix):]
|
||||
if remainder:
|
||||
return remainder
|
||||
|
||||
return def_name
|
||||
|
||||
|
||||
def extract_form_sources(source, exprs):
|
||||
"""Extract original source text for each top-level form.
|
||||
|
||||
Walks the source text tracking paren depth to find form boundaries.
|
||||
Returns list of (source_text, is_comment_block) for each expression
|
||||
plus any preceding comments.
|
||||
"""
|
||||
results = []
|
||||
pos = 0
|
||||
n = len(source)
|
||||
|
||||
for expr_idx in range(len(exprs)):
|
||||
# Collect leading whitespace and comments
|
||||
comment_lines = []
|
||||
form_start = pos
|
||||
|
||||
while pos < n:
|
||||
# Skip whitespace
|
||||
while pos < n and source[pos] in ' \t\r\n':
|
||||
pos += 1
|
||||
if pos >= n:
|
||||
break
|
||||
|
||||
if source[pos] == ';':
|
||||
# Comment line
|
||||
line_start = pos
|
||||
while pos < n and source[pos] != '\n':
|
||||
pos += 1
|
||||
if pos < n:
|
||||
pos += 1 # skip newline
|
||||
comment_lines.append(source[line_start:pos].rstrip())
|
||||
continue
|
||||
|
||||
# Found start of form
|
||||
break
|
||||
|
||||
if pos >= n:
|
||||
break
|
||||
|
||||
# Extract the form
|
||||
if source[pos] == '(':
|
||||
depth = 0
|
||||
in_string = False
|
||||
escape = False
|
||||
form_body_start = pos
|
||||
|
||||
while pos < n:
|
||||
c = source[pos]
|
||||
if escape:
|
||||
escape = False
|
||||
elif c == '\\' and in_string:
|
||||
escape = True
|
||||
elif c == '"':
|
||||
in_string = not in_string
|
||||
elif not in_string:
|
||||
if c == '(':
|
||||
depth += 1
|
||||
elif c == ')':
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
pos += 1
|
||||
break
|
||||
pos += 1
|
||||
|
||||
form_text = source[form_body_start:pos]
|
||||
# Include preceding comments
|
||||
if comment_lines:
|
||||
full_text = '\n'.join(comment_lines) + '\n' + form_text
|
||||
else:
|
||||
full_text = form_text
|
||||
|
||||
results.append(full_text)
|
||||
else:
|
||||
# Non-paren form (symbol, etc.)
|
||||
start = pos
|
||||
while pos < n and source[pos] not in ' \t\r\n':
|
||||
pos += 1
|
||||
results.append(source[start:pos])
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def process_directory(base_dir, dry_run=True):
|
||||
"""Process all .sx files, return split plan."""
|
||||
|
||||
splits = [] # (source_file, target_file, content, kw, old_name)
|
||||
single = [] # (source_file, kw, old_name)
|
||||
no_defs = [] # files with no definitions
|
||||
errors = []
|
||||
|
||||
for root, dirs, files in os.walk(base_dir):
|
||||
dirs[:] = [d for d in dirs if d not in ('__pycache__', '.cache', '.pytest_cache')]
|
||||
|
||||
for filename in sorted(files):
|
||||
if not filename.endswith('.sx') or filename in SKIP_FILES:
|
||||
continue
|
||||
|
||||
filepath = os.path.join(root, filename)
|
||||
rel_path = os.path.relpath(filepath, base_dir)
|
||||
|
||||
try:
|
||||
with open(filepath, encoding='utf-8') as f:
|
||||
source = f.read()
|
||||
except Exception as e:
|
||||
errors.append((rel_path, str(e)))
|
||||
continue
|
||||
|
||||
try:
|
||||
exprs = parse_all(source)
|
||||
except Exception as e:
|
||||
errors.append((rel_path, f"Parse: {e}"))
|
||||
continue
|
||||
|
||||
# Classify
|
||||
defs = []
|
||||
non_defs = []
|
||||
for expr in exprs:
|
||||
info = get_def_info(expr)
|
||||
if info:
|
||||
defs.append((expr, info))
|
||||
else:
|
||||
non_defs.append(expr)
|
||||
|
||||
if not defs:
|
||||
no_defs.append(rel_path)
|
||||
continue
|
||||
|
||||
if len(defs) == 1 and not non_defs:
|
||||
# Single definition — stays as-is
|
||||
_, (kw, name) = defs[0]
|
||||
single.append((rel_path, kw, name))
|
||||
continue
|
||||
|
||||
# Multiple definitions — split
|
||||
file_stem = Path(filename).stem
|
||||
file_dir = os.path.dirname(rel_path)
|
||||
target_dir = os.path.join(file_dir, file_stem)
|
||||
|
||||
# Get original source for each form
|
||||
form_sources = extract_form_sources(source, exprs)
|
||||
|
||||
all_exprs = []
|
||||
for expr in exprs:
|
||||
info = get_def_info(expr)
|
||||
all_exprs.append((expr, info))
|
||||
|
||||
# Deduplicate: keep only the LAST definition for each name
|
||||
seen_names = {}
|
||||
for idx, (expr, info) in enumerate(all_exprs):
|
||||
if info:
|
||||
seen_names[info[1]] = idx
|
||||
last_idx_for_name = set(seen_names.values())
|
||||
|
||||
for idx, (expr, info) in enumerate(all_exprs):
|
||||
if info is None:
|
||||
# Non-def form
|
||||
continue
|
||||
if idx not in last_idx_for_name:
|
||||
# Earlier duplicate — skip
|
||||
continue
|
||||
|
||||
kw, name = info
|
||||
local = derive_local_name(name, rel_path)
|
||||
safe_local = local.replace('/', '-')
|
||||
target_file = os.path.join(target_dir, safe_local + '.sx')
|
||||
|
||||
# Use original source if available, else serialize
|
||||
if idx < len(form_sources):
|
||||
content = form_sources[idx]
|
||||
else:
|
||||
content = serialize(expr, pretty=True)
|
||||
|
||||
splits.append((rel_path, target_file, content, kw, name))
|
||||
|
||||
# Non-def forms: collect into _init.sx
|
||||
if non_defs:
|
||||
init_parts = []
|
||||
# Find their original source
|
||||
non_def_idx = 0
|
||||
for idx, (expr, info) in enumerate(all_exprs):
|
||||
if info is None:
|
||||
if idx < len(form_sources):
|
||||
init_parts.append(form_sources[idx])
|
||||
else:
|
||||
init_parts.append(serialize(expr, pretty=True))
|
||||
non_def_idx += 1
|
||||
|
||||
if init_parts:
|
||||
init_content = '\n\n'.join(init_parts)
|
||||
init_file = os.path.join(target_dir, '_init.sx')
|
||||
splits.append((rel_path, init_file, init_content, 'init', '_init'))
|
||||
|
||||
return splits, single, no_defs, errors
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Migrate SX to one-per-file")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--dir", default=None)
|
||||
args = parser.parse_args()
|
||||
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
os.chdir(project_root)
|
||||
|
||||
dirs = [args.dir] if args.dir else ["sx/sx", "sx/sxc"]
|
||||
|
||||
all_splits = []
|
||||
all_single = []
|
||||
all_no_defs = []
|
||||
all_errors = []
|
||||
|
||||
for d in dirs:
|
||||
if not os.path.isdir(d):
|
||||
print(f"Skip {d}")
|
||||
continue
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f" {d}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
splits, single, no_defs, errors = process_directory(d, args.dry_run)
|
||||
|
||||
# Prefix with base dir for full paths
|
||||
for s in splits:
|
||||
src, tgt, content, kw, name = s
|
||||
all_splits.append((d, os.path.join(d, src), os.path.join(d, tgt),
|
||||
content, kw, name))
|
||||
for s in single:
|
||||
path, kw, name = s
|
||||
all_single.append((d, os.path.join(d, path), kw, name))
|
||||
all_no_defs.extend(os.path.join(d, p) for p in no_defs)
|
||||
all_errors.extend((os.path.join(d, p), e) for p, e in errors)
|
||||
|
||||
# Check conflicts
|
||||
target_map = {}
|
||||
conflicts = []
|
||||
for _, src, tgt, content, kw, name in all_splits:
|
||||
if tgt in target_map:
|
||||
conflicts.append((tgt, target_map[tgt], (kw, name, src)))
|
||||
else:
|
||||
target_map[tgt] = (kw, name, src)
|
||||
|
||||
# Group splits by source file
|
||||
by_source = {}
|
||||
for _, src, tgt, content, kw, name in all_splits:
|
||||
by_source.setdefault(src, []).append((tgt, kw, name))
|
||||
|
||||
# Report
|
||||
if all_errors:
|
||||
print(f"\n--- Errors ({len(all_errors)}) ---")
|
||||
for p, e in all_errors:
|
||||
print(f" {p}: {e}")
|
||||
|
||||
if conflicts:
|
||||
print(f"\n--- {len(conflicts)} Conflicts ---")
|
||||
for tgt, existing, new in conflicts:
|
||||
print(f" {tgt}")
|
||||
print(f" existing: {existing[1]} from {existing[2]}")
|
||||
print(f" new: {new[1]} from {new[2]}")
|
||||
|
||||
total_new = len(all_splits)
|
||||
print(f"\n{'='*60}")
|
||||
print(f" Summary")
|
||||
print(f"{'='*60}")
|
||||
print(f" Files to split: {len(by_source)}")
|
||||
print(f" New files: {total_new}")
|
||||
print(f" Single-def (keep): {len(all_single)}")
|
||||
print(f" No-defs (skip): {len(all_no_defs)}")
|
||||
print(f" Conflicts: {len(conflicts)}")
|
||||
|
||||
if args.dry_run:
|
||||
print(f"\n--- Split plan ---")
|
||||
for src in sorted(by_source.keys()):
|
||||
targets = by_source[src]
|
||||
print(f"\n {src} → {len(targets)} files:")
|
||||
for tgt, kw, name in sorted(targets):
|
||||
print(f" {tgt} ({kw})")
|
||||
|
||||
print(f"\n--- Single-def files ---")
|
||||
for _, path, kw, name in sorted(all_single)[:15]:
|
||||
print(f" {path} ({kw} {name})")
|
||||
if len(all_single) > 15:
|
||||
print(f" ... and {len(all_single) - 15} more")
|
||||
|
||||
# Show a sample
|
||||
if all_splits:
|
||||
_, src, tgt, content, kw, name = all_splits[0]
|
||||
print(f"\n--- Sample: {tgt} ---")
|
||||
lines = content.split('\n')
|
||||
for line in lines[:15]:
|
||||
print(f" {line}")
|
||||
if len(lines) > 15:
|
||||
print(f" ... ({len(lines)} lines total)")
|
||||
|
||||
print(f"\nDry run. Run without --dry-run to execute.")
|
||||
return
|
||||
|
||||
# Execute
|
||||
if conflicts:
|
||||
print("Aborting due to conflicts.")
|
||||
sys.exit(1)
|
||||
|
||||
created = 0
|
||||
for _, src, tgt, content, kw, name in all_splits:
|
||||
os.makedirs(os.path.dirname(tgt), exist_ok=True)
|
||||
if os.path.exists(tgt):
|
||||
print(f" SKIP (exists): {tgt}")
|
||||
continue
|
||||
with open(tgt, 'w', encoding='utf-8') as f:
|
||||
f.write(content.rstrip() + '\n')
|
||||
created += 1
|
||||
|
||||
# Delete source files
|
||||
deleted = 0
|
||||
for src in by_source:
|
||||
if os.path.exists(src):
|
||||
os.remove(src)
|
||||
deleted += 1
|
||||
|
||||
print(f"\n Created {created}, deleted {deleted} source files.")
|
||||
|
||||
# Build name mapping: old_name -> new_path_name
|
||||
mapping = {}
|
||||
for _, src, tgt, _, kw, old_name in all_splits:
|
||||
if kw in ('init', 'preamble'):
|
||||
continue
|
||||
# Determine which base_dir this target is in
|
||||
for d in dirs:
|
||||
if tgt.startswith(d + '/'):
|
||||
new_name = os.path.splitext(os.path.relpath(tgt, d))[0]
|
||||
if old_name != new_name:
|
||||
mapping[old_name] = new_name
|
||||
break
|
||||
|
||||
mapping_file = "scripts/name-mapping.json"
|
||||
with open(mapping_file, 'w') as f:
|
||||
json.dump(mapping, f, indent=2, sort_keys=True)
|
||||
print(f" Name mapping: {mapping_file} ({len(mapping)} entries)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user