#!/usr/bin/env python3 """Migrate sx_docs components to one-definition-per-file convention. Reads all .sx files under sx/sx/ and sx/sxc/, splits multi-definition files into one file per definition. Usage: python3 scripts/migrate_one_per_file.py --dry-run # preview python3 scripts/migrate_one_per_file.py # execute """ import os import sys import json import argparse from pathlib import Path sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from shared.sx.parser import parse_all, serialize from shared.sx.types import Symbol, Keyword NAMED_DEFS = {"defcomp", "defisland", "defmacro", "defpage", "defhandler", "defstyle", "deftype", "defeffect", "defrelation", "deftest"} SKIP_FILES = {"boundary.sx"} def get_def_info(expr): """Return (keyword, name) for a definition, or None.""" if not isinstance(expr, list) or not expr: return None head = expr[0] if not isinstance(head, Symbol): return None kw = head.name if kw in NAMED_DEFS: if len(expr) < 2 or not isinstance(expr[1], Symbol): return None return (kw, expr[1].name.lstrip("~")) if kw == "define": if len(expr) < 2: return None if isinstance(expr[1], Symbol): return ("define", expr[1].name) elif (isinstance(expr[1], list) and expr[1] and isinstance(expr[1][0], Symbol)): return ("define", expr[1][0].name) return None return None def derive_local_name(def_name, file_rel_path): """Derive short filename for a definition within the file's directory. Strategy: 1. If name contains '/', split on LAST '/' → namespace + local. Then strip redundant namespace-as-prefix from local. 2. If no '/', try stripping the file's full path (as hyphens) prefix. 3. Else use the full name. Examples: name: examples/card file: examples.sx → card name: layouts/doc file: layouts.sx → doc name: reactive-islands-demo/example-counter file: reactive-islands/demo.sx → example-counter name: geography-cek/geography-cek-cek-content file: geography/cek.sx → cek-content name: docs-nav-items file: nav-data.sx → docs-nav-items """ if '/' in def_name: namespace, local = def_name.rsplit('/', 1) # Strip redundant namespace prefix from local part # e.g. geography-cek/geography-cek-cek-content → cek-content ns_prefix = namespace + '-' if local.startswith(ns_prefix): stripped = local[len(ns_prefix):] if stripped: return stripped return local # No / in name — try stripping file path prefix stem = os.path.splitext(file_rel_path)[0] path_prefix = stem.replace('/', '-').replace('\\', '-') + '-' if def_name.startswith(path_prefix): remainder = def_name[len(path_prefix):] if remainder: return remainder # Try just the file stem file_stem = Path(file_rel_path).stem stem_prefix = file_stem + '-' if def_name.startswith(stem_prefix): remainder = def_name[len(stem_prefix):] if remainder: return remainder return def_name def extract_form_sources(source, exprs): """Extract original source text for each top-level form. Walks the source text tracking paren depth to find form boundaries. Returns list of (source_text, is_comment_block) for each expression plus any preceding comments. """ results = [] pos = 0 n = len(source) for expr_idx in range(len(exprs)): # Collect leading whitespace and comments comment_lines = [] form_start = pos while pos < n: # Skip whitespace while pos < n and source[pos] in ' \t\r\n': pos += 1 if pos >= n: break if source[pos] == ';': # Comment line line_start = pos while pos < n and source[pos] != '\n': pos += 1 if pos < n: pos += 1 # skip newline comment_lines.append(source[line_start:pos].rstrip()) continue # Found start of form break if pos >= n: break # Extract the form if source[pos] == '(': depth = 0 in_string = False escape = False form_body_start = pos while pos < n: c = source[pos] if escape: escape = False elif c == '\\' and in_string: escape = True elif c == '"': in_string = not in_string elif not in_string: if c == '(': depth += 1 elif c == ')': depth -= 1 if depth == 0: pos += 1 break pos += 1 form_text = source[form_body_start:pos] # Include preceding comments if comment_lines: full_text = '\n'.join(comment_lines) + '\n' + form_text else: full_text = form_text results.append(full_text) else: # Non-paren form (symbol, etc.) start = pos while pos < n and source[pos] not in ' \t\r\n': pos += 1 results.append(source[start:pos]) return results def process_directory(base_dir, dry_run=True): """Process all .sx files, return split plan.""" splits = [] # (source_file, target_file, content, kw, old_name) single = [] # (source_file, kw, old_name) no_defs = [] # files with no definitions errors = [] for root, dirs, files in os.walk(base_dir): dirs[:] = [d for d in dirs if d not in ('__pycache__', '.cache', '.pytest_cache')] for filename in sorted(files): if not filename.endswith('.sx') or filename in SKIP_FILES: continue filepath = os.path.join(root, filename) rel_path = os.path.relpath(filepath, base_dir) try: with open(filepath, encoding='utf-8') as f: source = f.read() except Exception as e: errors.append((rel_path, str(e))) continue try: exprs = parse_all(source) except Exception as e: errors.append((rel_path, f"Parse: {e}")) continue # Classify defs = [] non_defs = [] for expr in exprs: info = get_def_info(expr) if info: defs.append((expr, info)) else: non_defs.append(expr) if not defs: no_defs.append(rel_path) continue if len(defs) == 1 and not non_defs: # Single definition — stays as-is _, (kw, name) = defs[0] single.append((rel_path, kw, name)) continue # Multiple definitions — split file_stem = Path(filename).stem file_dir = os.path.dirname(rel_path) target_dir = os.path.join(file_dir, file_stem) # Get original source for each form form_sources = extract_form_sources(source, exprs) all_exprs = [] for expr in exprs: info = get_def_info(expr) all_exprs.append((expr, info)) # Deduplicate: keep only the LAST definition for each name seen_names = {} for idx, (expr, info) in enumerate(all_exprs): if info: seen_names[info[1]] = idx last_idx_for_name = set(seen_names.values()) for idx, (expr, info) in enumerate(all_exprs): if info is None: # Non-def form continue if idx not in last_idx_for_name: # Earlier duplicate — skip continue kw, name = info local = derive_local_name(name, rel_path) safe_local = local.replace('/', '-') target_file = os.path.join(target_dir, safe_local + '.sx') # Use original source if available, else serialize if idx < len(form_sources): content = form_sources[idx] else: content = serialize(expr, pretty=True) splits.append((rel_path, target_file, content, kw, name)) # Non-def forms: collect into _init.sx if non_defs: init_parts = [] # Find their original source non_def_idx = 0 for idx, (expr, info) in enumerate(all_exprs): if info is None: if idx < len(form_sources): init_parts.append(form_sources[idx]) else: init_parts.append(serialize(expr, pretty=True)) non_def_idx += 1 if init_parts: init_content = '\n\n'.join(init_parts) init_file = os.path.join(target_dir, '_init.sx') splits.append((rel_path, init_file, init_content, 'init', '_init')) return splits, single, no_defs, errors def main(): parser = argparse.ArgumentParser(description="Migrate SX to one-per-file") parser.add_argument("--dry-run", action="store_true") parser.add_argument("--dir", default=None) args = parser.parse_args() project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) os.chdir(project_root) dirs = [args.dir] if args.dir else ["sx/sx", "sx/sxc"] all_splits = [] all_single = [] all_no_defs = [] all_errors = [] for d in dirs: if not os.path.isdir(d): print(f"Skip {d}") continue print(f"\n{'='*60}") print(f" {d}") print(f"{'='*60}") splits, single, no_defs, errors = process_directory(d, args.dry_run) # Prefix with base dir for full paths for s in splits: src, tgt, content, kw, name = s all_splits.append((d, os.path.join(d, src), os.path.join(d, tgt), content, kw, name)) for s in single: path, kw, name = s all_single.append((d, os.path.join(d, path), kw, name)) all_no_defs.extend(os.path.join(d, p) for p in no_defs) all_errors.extend((os.path.join(d, p), e) for p, e in errors) # Check conflicts target_map = {} conflicts = [] for _, src, tgt, content, kw, name in all_splits: if tgt in target_map: conflicts.append((tgt, target_map[tgt], (kw, name, src))) else: target_map[tgt] = (kw, name, src) # Group splits by source file by_source = {} for _, src, tgt, content, kw, name in all_splits: by_source.setdefault(src, []).append((tgt, kw, name)) # Report if all_errors: print(f"\n--- Errors ({len(all_errors)}) ---") for p, e in all_errors: print(f" {p}: {e}") if conflicts: print(f"\n--- {len(conflicts)} Conflicts ---") for tgt, existing, new in conflicts: print(f" {tgt}") print(f" existing: {existing[1]} from {existing[2]}") print(f" new: {new[1]} from {new[2]}") total_new = len(all_splits) print(f"\n{'='*60}") print(f" Summary") print(f"{'='*60}") print(f" Files to split: {len(by_source)}") print(f" New files: {total_new}") print(f" Single-def (keep): {len(all_single)}") print(f" No-defs (skip): {len(all_no_defs)}") print(f" Conflicts: {len(conflicts)}") if args.dry_run: print(f"\n--- Split plan ---") for src in sorted(by_source.keys()): targets = by_source[src] print(f"\n {src} → {len(targets)} files:") for tgt, kw, name in sorted(targets): print(f" {tgt} ({kw})") print(f"\n--- Single-def files ---") for _, path, kw, name in sorted(all_single)[:15]: print(f" {path} ({kw} {name})") if len(all_single) > 15: print(f" ... and {len(all_single) - 15} more") # Show a sample if all_splits: _, src, tgt, content, kw, name = all_splits[0] print(f"\n--- Sample: {tgt} ---") lines = content.split('\n') for line in lines[:15]: print(f" {line}") if len(lines) > 15: print(f" ... ({len(lines)} lines total)") print(f"\nDry run. Run without --dry-run to execute.") return # Execute if conflicts: print("Aborting due to conflicts.") sys.exit(1) created = 0 for _, src, tgt, content, kw, name in all_splits: os.makedirs(os.path.dirname(tgt), exist_ok=True) if os.path.exists(tgt): print(f" SKIP (exists): {tgt}") continue with open(tgt, 'w', encoding='utf-8') as f: f.write(content.rstrip() + '\n') created += 1 # Delete source files deleted = 0 for src in by_source: if os.path.exists(src): os.remove(src) deleted += 1 print(f"\n Created {created}, deleted {deleted} source files.") # Build name mapping: old_name -> new_path_name mapping = {} for _, src, tgt, _, kw, old_name in all_splits: if kw in ('init', 'preamble'): continue # Determine which base_dir this target is in for d in dirs: if tgt.startswith(d + '/'): new_name = os.path.splitext(os.path.relpath(tgt, d))[0] if old_name != new_name: mapping[old_name] = new_name break mapping_file = "scripts/name-mapping.json" with open(mapping_file, 'w') as f: json.dump(mapping, f, indent=2, sort_keys=True) print(f" Name mapping: {mapping_file} ({len(mapping)} entries)") if __name__ == "__main__": main()