#!/usr/bin/env python3 """ Re-convert sx_content from lexical JSON to eliminate ~kg-html wrappers and raw caption strings. The updated lexical_to_sx converter now produces native sx expressions instead of (1) wrapping HTML/markdown cards in (~kg-html :html "...") and (2) storing captions as escaped HTML strings. This script re-runs the conversion on all posts that already have sx_content, overwriting the old output. Usage: cd blog && python3 scripts/migrate_sx_html.py [--dry-run] """ from __future__ import annotations import argparse import asyncio import sys from sqlalchemy import select, and_ async def migrate(dry_run: bool = False) -> int: from shared.db.session import get_session from models.ghost_content import Post from bp.blog.ghost.lexical_to_sx import lexical_to_sx converted = 0 skipped = 0 errors = 0 async with get_session() as sess: # All posts with lexical content (whether or not sx_content exists) stmt = select(Post).where( and_( Post.lexical.isnot(None), Post.lexical != "", ) ) result = await sess.execute(stmt) posts = result.scalars().all() print(f"Found {len(posts)} posts with lexical content") for post in posts: try: new_sx = lexical_to_sx(post.lexical) if post.sx_content == new_sx: skipped += 1 continue if dry_run: old_has_kg = "~kg-html" in (post.sx_content or "") old_has_raw = "raw! caption" in (post.sx_content or "") markers = [] if old_has_kg: markers.append("~kg-html") if old_has_raw: markers.append("raw-caption") tag = f" [{', '.join(markers)}]" if markers else "" print(f" [DRY RUN] {post.slug}: {len(new_sx)} chars{tag}") else: post.sx_content = new_sx print(f" Converted: {post.slug} ({len(new_sx)} chars)") converted += 1 except Exception as e: print(f" ERROR: {post.slug}: {e}", file=sys.stderr) errors += 1 if not dry_run and converted: await sess.commit() print(f"\nDone: {converted} converted, {skipped} unchanged, {errors} errors") return converted def main(): parser = argparse.ArgumentParser( description="Re-convert sx_content to eliminate ~kg-html and raw captions" ) parser.add_argument("--dry-run", action="store_true", help="Preview changes without writing to database") args = parser.parse_args() asyncio.run(migrate(dry_run=args.dry_run)) if __name__ == "__main__": main()