Enrich AP posts: Note type, images, hashtags, HTML excerpt
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 52s

- Switch object type from Article to Note (Mastodon first-class support)
- Include title + excerpt as HTML content with "Read more" link
- Feature image + up to 3 inline images as AP attachments
- Post tags as AP Hashtag objects with inline links in content

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
giles
2026-02-22 09:35:40 +00:00
parent 8cc17e195d
commit 4c44fc64c5

View File

@@ -1,7 +1,9 @@
from __future__ import annotations
import os
import re
import asyncio
from datetime import datetime
from html import escape as html_escape
from typing import Dict, Any, Optional
import httpx
@@ -993,6 +995,81 @@ async def fetch_single_tag_from_ghost(ghost_id: str) -> Optional[dict[str, Any]]
return tags[0] if tags else None
def _build_ap_post_data(post, post_url: str, tag_objs: list) -> dict:
"""Build rich AP object_data for a blog post/page.
Produces a Note with HTML content (excerpt), feature image + inline
images as attachments, and tags as AP Hashtag objects.
"""
# Content HTML: title + excerpt + "Read more" link
parts: list[str] = []
if post.title:
parts.append(f"<p>{html_escape(post.title)}</p>")
excerpt = post.custom_excerpt or post.excerpt or ""
if not excerpt and post.plaintext:
excerpt = post.plaintext[:500]
if len(post.plaintext) > 500:
excerpt = excerpt.rsplit(" ", 1)[0] + "\u2026"
if excerpt:
for para in excerpt.split("\n\n"):
para = para.strip()
if para:
parts.append(f"<p>{html_escape(para)}</p>")
parts.append(f'<p><a href="{html_escape(post_url)}">Read more \u2192</a></p>')
# Hashtag links in content (Mastodon expects them inline too)
if tag_objs:
ht_links = []
for t in tag_objs:
clean = t.slug.replace("-", "")
ht_links.append(
f'<a href="{html_escape(post_url)}tag/{t.slug}/" rel="tag">#{clean}</a>'
)
parts.append(f'<p>{" ".join(ht_links)}</p>')
obj: dict = {
"name": post.title or "",
"content": "\n".join(parts),
"url": post_url,
}
# Attachments: feature image + inline images (max 4)
attachments: list[dict] = []
seen: set[str] = set()
if post.feature_image:
att: dict = {"type": "Image", "url": post.feature_image}
if post.feature_image_alt:
att["name"] = post.feature_image_alt
attachments.append(att)
seen.add(post.feature_image)
if post.html:
for src in re.findall(r'<img[^>]+src="([^"]+)"', post.html):
if src not in seen and len(attachments) < 4:
attachments.append({"type": "Image", "url": src})
seen.add(src)
if attachments:
obj["attachment"] = attachments
# AP Hashtag objects
if tag_objs:
obj["tag"] = [
{
"type": "Hashtag",
"href": f"{post_url}tag/{t.slug}/",
"name": f"#{t.slug.replace('-', '')}",
}
for t in tag_objs
]
return obj
async def sync_single_post(sess: AsyncSession, ghost_id: str) -> None:
gp = await fetch_single_post_from_ghost(ghost_id)
if gp is None:
@@ -1028,6 +1105,7 @@ async def sync_single_post(sess: AsyncSession, ghost_id: str) -> None:
from shared.services.federation_publish import try_publish
from shared.infrastructure.urls import app_url
post_url = app_url("coop", f"/{post.slug}/")
post_tags = [tag_map[t["id"]] for t in (gp.get("tags") or []) if t["id"] in tag_map]
if post.status == "published":
activity_type = "Create" if old_status != "published" else "Update"
@@ -1035,12 +1113,8 @@ async def sync_single_post(sess: AsyncSession, ghost_id: str) -> None:
sess,
user_id=post.user_id,
activity_type=activity_type,
object_type="Article",
object_data={
"name": post.title or "",
"content": post.custom_excerpt or post.excerpt or "",
"url": post_url,
},
object_type="Note",
object_data=_build_ap_post_data(post, post_url, post_tags),
source_type="Post",
source_id=post.id,
)
@@ -1052,7 +1126,7 @@ async def sync_single_post(sess: AsyncSession, ghost_id: str) -> None:
object_type="Tombstone",
object_data={
"id": post_url,
"formerType": "Article",
"formerType": "Note",
},
source_type="Post",
source_id=post.id,
@@ -1096,6 +1170,7 @@ async def sync_single_page(sess: AsyncSession, ghost_id: str) -> None:
from shared.services.federation_publish import try_publish
from shared.infrastructure.urls import app_url
post_url = app_url("coop", f"/{post.slug}/")
post_tags = [tag_map[t["id"]] for t in (gp.get("tags") or []) if t["id"] in tag_map]
if post.status == "published":
activity_type = "Create" if old_status != "published" else "Update"
@@ -1103,12 +1178,8 @@ async def sync_single_page(sess: AsyncSession, ghost_id: str) -> None:
sess,
user_id=post.user_id,
activity_type=activity_type,
object_type="Article",
object_data={
"name": post.title or "",
"content": post.custom_excerpt or post.excerpt or "",
"url": post_url,
},
object_type="Note",
object_data=_build_ap_post_data(post, post_url, post_tags),
source_type="Post",
source_id=post.id,
)
@@ -1120,7 +1191,7 @@ async def sync_single_page(sess: AsyncSession, ghost_id: str) -> None:
object_type="Tombstone",
object_data={
"id": post_url,
"formerType": "Article",
"formerType": "Note",
},
source_type="Post",
source_id=post.id,