Files
mono/l2/migrate.py
2026-02-24 23:07:31 +00:00

246 lines
7.9 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Migration script: JSON files to PostgreSQL.
Usage:
python migrate.py [--dry-run]
Migrates:
- users.json -> users table
- registry.json -> assets table
- activities.json -> activities table
- followers.json -> followers table
Does NOT migrate:
- keys/ directory (stays as files)
"""
import asyncio
import json
import os
import sys
from pathlib import Path
from datetime import datetime, timezone
from uuid import UUID
import asyncpg
# Configuration
DATA_DIR = Path(os.environ.get("ARTDAG_DATA", str(Path.home() / ".artdag" / "l2")))
DATABASE_URL = os.environ.get("DATABASE_URL")
if not DATABASE_URL:
raise RuntimeError("DATABASE_URL environment variable is required")
SCHEMA = """
-- Drop existing tables (careful in production!)
DROP TABLE IF EXISTS followers CASCADE;
DROP TABLE IF EXISTS activities CASCADE;
DROP TABLE IF EXISTS assets CASCADE;
DROP TABLE IF EXISTS users CASCADE;
-- Users table
CREATE TABLE users (
username VARCHAR(255) PRIMARY KEY,
password_hash VARCHAR(255) NOT NULL,
email VARCHAR(255),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Assets table
CREATE TABLE assets (
name VARCHAR(255) PRIMARY KEY,
content_hash VARCHAR(128) NOT NULL,
asset_type VARCHAR(50) NOT NULL,
tags JSONB DEFAULT '[]'::jsonb,
metadata JSONB DEFAULT '{}'::jsonb,
url TEXT,
provenance JSONB,
description TEXT,
origin JSONB,
owner VARCHAR(255) NOT NULL REFERENCES users(username),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ
);
-- Activities table
CREATE TABLE activities (
activity_id UUID PRIMARY KEY,
activity_type VARCHAR(50) NOT NULL,
actor_id TEXT NOT NULL,
object_data JSONB NOT NULL,
published TIMESTAMPTZ NOT NULL,
signature JSONB
);
-- Followers table
CREATE TABLE followers (
id SERIAL PRIMARY KEY,
username VARCHAR(255) NOT NULL REFERENCES users(username),
acct VARCHAR(255) NOT NULL,
url TEXT NOT NULL,
public_key TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(username, acct)
);
-- Indexes
CREATE INDEX idx_users_created_at ON users(created_at);
CREATE INDEX idx_assets_content_hash ON assets(content_hash);
CREATE INDEX idx_assets_owner ON assets(owner);
CREATE INDEX idx_assets_created_at ON assets(created_at DESC);
CREATE INDEX idx_assets_tags ON assets USING GIN(tags);
CREATE INDEX idx_activities_actor_id ON activities(actor_id);
CREATE INDEX idx_activities_published ON activities(published DESC);
CREATE INDEX idx_followers_username ON followers(username);
"""
async def migrate(dry_run: bool = False):
"""Run the migration."""
print(f"Migrating from {DATA_DIR} to PostgreSQL")
print(f"Database: {DATABASE_URL}")
print(f"Dry run: {dry_run}")
print()
# Load JSON files
users = load_json(DATA_DIR / "users.json") or {}
registry = load_json(DATA_DIR / "registry.json") or {"assets": {}}
activities_data = load_json(DATA_DIR / "activities.json") or {"activities": []}
followers = load_json(DATA_DIR / "followers.json") or []
assets = registry.get("assets", {})
activities = activities_data.get("activities", [])
print(f"Found {len(users)} users")
print(f"Found {len(assets)} assets")
print(f"Found {len(activities)} activities")
print(f"Found {len(followers)} followers")
print()
if dry_run:
print("DRY RUN - no changes made")
return
# Connect and migrate
conn = await asyncpg.connect(DATABASE_URL)
try:
# Create schema
print("Creating schema...")
await conn.execute(SCHEMA)
# Migrate users
print("Migrating users...")
for username, user_data in users.items():
await conn.execute(
"""INSERT INTO users (username, password_hash, email, created_at)
VALUES ($1, $2, $3, $4)""",
username,
user_data["password_hash"],
user_data.get("email"),
parse_timestamp(user_data.get("created_at"))
)
print(f" Migrated {len(users)} users")
# Migrate assets
print("Migrating assets...")
for name, asset in assets.items():
await conn.execute(
"""INSERT INTO assets (name, content_hash, asset_type, tags, metadata,
url, provenance, description, origin, owner,
created_at, updated_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)""",
name,
asset["content_hash"],
asset["asset_type"],
json.dumps(asset.get("tags", [])),
json.dumps(asset.get("metadata", {})),
asset.get("url"),
json.dumps(asset.get("provenance")) if asset.get("provenance") else None,
asset.get("description"),
json.dumps(asset.get("origin")) if asset.get("origin") else None,
asset["owner"],
parse_timestamp(asset.get("created_at")),
parse_timestamp(asset.get("updated_at"))
)
print(f" Migrated {len(assets)} assets")
# Migrate activities
print("Migrating activities...")
for activity in activities:
await conn.execute(
"""INSERT INTO activities (activity_id, activity_type, actor_id,
object_data, published, signature)
VALUES ($1, $2, $3, $4, $5, $6)""",
UUID(activity["activity_id"]),
activity["activity_type"],
activity["actor_id"],
json.dumps(activity["object_data"]),
parse_timestamp(activity["published"]),
json.dumps(activity.get("signature")) if activity.get("signature") else None
)
print(f" Migrated {len(activities)} activities")
# Migrate followers
print("Migrating followers...")
if followers and users:
first_user = list(users.keys())[0]
migrated = 0
for follower in followers:
if isinstance(follower, str):
# Old format: just URL string
await conn.execute(
"""INSERT INTO followers (username, acct, url)
VALUES ($1, $2, $3)
ON CONFLICT DO NOTHING""",
first_user,
follower,
follower
)
migrated += 1
elif isinstance(follower, dict):
await conn.execute(
"""INSERT INTO followers (username, acct, url, public_key)
VALUES ($1, $2, $3, $4)
ON CONFLICT DO NOTHING""",
follower.get("username", first_user),
follower.get("acct", follower.get("url", "")),
follower["url"],
follower.get("public_key")
)
migrated += 1
print(f" Migrated {migrated} followers")
else:
print(" No followers to migrate")
print()
print("Migration complete!")
finally:
await conn.close()
def load_json(path: Path) -> dict | list | None:
"""Load JSON file if it exists."""
if path.exists():
with open(path) as f:
return json.load(f)
return None
def parse_timestamp(ts: str | None) -> datetime | None:
"""Parse ISO timestamp string to datetime."""
if not ts:
return datetime.now(timezone.utc)
try:
# Handle various ISO formats
if ts.endswith('Z'):
ts = ts[:-1] + '+00:00'
return datetime.fromisoformat(ts)
except Exception:
return datetime.now(timezone.utc)
if __name__ == "__main__":
dry_run = "--dry-run" in sys.argv
asyncio.run(migrate(dry_run))