From e32f5bbb42a15970cb7a135420708028ef150914 Mon Sep 17 00:00:00 2001 From: Jiwon Kwon Date: Tue, 23 Jun 2026 19:13:43 +0900 Subject: [PATCH 1/3] Add documentation Add changes and package descriptions. Fix version to 2.3.0. Assisted-by: Codex:gpt-5.5 --- CHANGES.md | 13 ++++++++++ CONTRIBUTING.md | 2 ++ packages/backfill/README.md | 43 ++++++++++++++++++++++++++++++- packages/backfill/src/backfill.ts | 4 +-- packages/backfill/src/types.ts | 17 ++++++------ 5 files changed, 67 insertions(+), 12 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 9398d5286..e7c2b6fab 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -221,6 +221,19 @@ To be released. [#771]: https://github.com/fedify-dev/fedify/pull/771 [#772]: https://github.com/fedify-dev/fedify/pull/772 +### @fedify/backfill + + - Added *@fedify/backfill* for reconstructing ActivityPub conversations. + It supports FEP-f228 context collections containing post-like objects or + `Create` activities, optional reply-tree traversal, ordered hybrid + strategies, shared safety budgets, deduplication, and traversal-local + document caching. [[#275], [#779], [#801], [#807] by Jiwon Kwon] + +[#275]: https://github.com/fedify-dev/fedify/issues/275 +[#779]: https://github.com/fedify-dev/fedify/pull/779 +[#801]: https://github.com/fedify-dev/fedify/pull/801 +[#807]: https://github.com/fedify-dev/fedify/pull/807 + ### @fedify/fixture - Added `createTestMeterProvider()` and `TestMetricRecorder` helpers for diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9c421d48f..21327fa4e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -407,6 +407,8 @@ The repository is organized as a monorepo with the following packages: creating new Fedify projects. Wraps @fedify/init. - *packages/amqp/*: AMQP/RabbitMQ driver (@fedify/amqp) for Fedify. - *packages/astro/*: Astro integration (@fedify/astro) for Fedify. + - *packages/backfill/*: ActivityPub conversation backfill support + (@fedify/backfill) for Fedify. - *packages/cfworkers/*: Cloudflare Workers integration (@fedify/cfworkers) for Fedify. - *packages/debugger/*: Embedded ActivityPub debug dashboard diff --git a/packages/backfill/README.md b/packages/backfill/README.md index cd1e5f33b..a9e4e06c5 100644 --- a/packages/backfill/README.md +++ b/packages/backfill/README.md @@ -11,7 +11,7 @@ This package provides ActivityPub conversation backfill support for the [Fedify] ecosystem. It can retrieve post-like objects from a seed object's -context collection, following the direct FEP-f228-style path where the +context collection, following the direct [FEP-f228] path where the context dereferences to a `Collection`, `OrderedCollection`, `CollectionPage`, or `OrderedCollectionPage`. It can also use an opt-in reply-tree strategy to walk `inReplyTo` ancestors and `replies` descendants when context collections @@ -24,6 +24,7 @@ are unavailable or incomplete. [@fedify@hollo.social badge]: https://fedi-badge.deno.dev/@fedify@hollo.social/followers.svg [@fedify@hollo.social]: https://hollo.social/@fedify [Fedify]: https://fedify.dev/ +[FEP-f228]: https://w3id.org/fep/f228 Installation @@ -73,6 +74,19 @@ collection items are treated as backfillable objects by default. If an item is recognized as a supported `Create` activity, `backfill()` extracts the activity's object instead. +To accept only post-like objects directly contained in the context collection, +use the `context-objects` strategy: + +~~~~ typescript +for await ( + const item of backfill({ documentLoader }, note, { + strategies: ["context-objects"], + }) +) { + console.log(item.object); +} +~~~~ + To read only FEP-f228 activity collections, enable the `context-activities` strategy: @@ -109,3 +123,30 @@ objects from Activity wrappers. Immediate parents and direct replies have depth 1, their next-level parents or replies have depth 2, and so on. Reply-tree traversal defaults to a maximum depth of 10; set `maxDepth` to use a different limit. + + +Traversal controls +------------------ + +All configured strategies share the same traversal controls: + + - `maxItems` limits the number of yielded objects. Skipped duplicates do + not count. + - `maxRequests` limits calls to `documentLoader`. Embedded objects and + collections do not count. + - `maxDepth` limits reply-tree traversal and defaults to 10. It does not + limit context collection items. + - `interval` adds a delay between loader requests. Its callback receives + the zero-based request index. + - `signal` cancels traversal and is forwarded to `documentLoader`. + +An `interval` string requires the global `Temporal` API or a polyfill. +`Temporal.DurationLike` objects work without the global API. + +If the seed has no context, or its context resolves to a non-collection, +context strategies yield nothing. Loader failures are skipped unless +traversal is aborted. + +Dereferenced documents are cached in memory for one `backfill()` traversal. +Applications that need persistent or shared caching can provide it through +the `documentLoader`. diff --git a/packages/backfill/src/backfill.ts b/packages/backfill/src/backfill.ts index 02cb32319..a836f4c4a 100644 --- a/packages/backfill/src/backfill.ts +++ b/packages/backfill/src/backfill.ts @@ -26,7 +26,7 @@ const DEFAULT_MAX_DEPTH = 10; /** * Thrown when backfill traversal exceeds the configured request budget. * - * @since 2.x.0 + * @since 2.3.0 */ export class MaxRequestsExceeded extends Error {} @@ -57,7 +57,7 @@ type ReplyTreeTraversal = { * The seed object is not yielded by default, but its ID is treated as already * seen so it will not be yielded again if the collection contains it. * - * @since 2.x.0 + * @since 2.3.0 */ export async function* backfill< TObject extends APObject = APObject, diff --git a/packages/backfill/src/types.ts b/packages/backfill/src/types.ts index c7d15f80d..58090e698 100644 --- a/packages/backfill/src/types.ts +++ b/packages/backfill/src/types.ts @@ -13,7 +13,7 @@ import type { Object as APObject } from "@fedify/vocab"; * - `"reply-tree"` walks the reply graph through `inReplyTo` ancestors and * `replies` descendants, yielding discovered post-like objects. * - * @since 2.x.0 + * @since 2.3.0 */ export type BackfillStrategy = | "context-objects" @@ -24,10 +24,9 @@ export type BackfillStrategy = /** * Source relation that produced a backfilled object. * - * @since 2.x.0 + * @since 2.3.0 */ export type BackfillOrigin = - | "context" | "collection" | "in-reply-to" | "replies"; @@ -35,7 +34,7 @@ export type BackfillOrigin = /** * Options passed to {@link BackfillDocumentLoader}. * - * @since 2.x.0 + * @since 2.3.0 */ export interface BackfillDocumentLoaderOptions { /** @@ -47,7 +46,7 @@ export interface BackfillDocumentLoaderOptions { /** * Dereferences an ActivityPub object or collection IRI. * - * @since 2.x.0 + * @since 2.3.0 */ export type BackfillDocumentLoader = ( iri: URL, @@ -57,7 +56,7 @@ export type BackfillDocumentLoader = ( /** * Dependencies used by backfill traversal. * - * @since 2.x.0 + * @since 2.3.0 */ export interface BackfillContext { /** @@ -70,7 +69,7 @@ export interface BackfillContext { /** * Controls backfill traversal. * - * @since 2.x.0 + * @since 2.3.0 */ export interface BackfillOptions< TObject extends APObject = APObject, @@ -86,7 +85,7 @@ export interface BackfillOptions< * If `"context-auto"` is included, it absorbs other context collection * strategies. * - * @since 2.x.0 + * @since 2.3.0 */ readonly strategies?: readonly BackfillStrategy[]; @@ -134,7 +133,7 @@ export interface BackfillOptions< /** * A single object discovered by backfill traversal. * - * @since 2.x.0 + * @since 2.3.0 */ export interface BackfillItem< TObject extends APObject = APObject, From c6ef99de518cf1d2e2e82ca63e6cb52e39a39a39 Mon Sep 17 00:00:00 2001 From: Jiwon Kwon Date: Tue, 23 Jun 2026 23:41:45 +0900 Subject: [PATCH 2/3] Add backfill manual page Publish the @fedify/backfill guide on the documentation website with installation commands, strategy examples, traversal controls, and cache and failure behavior. Add the package to the docs workspace so Twoslash can validate the examples. Assisted-by: Codex:gpt-5.5 --- docs/.vitepress/config.mts | 1 + docs/manual/backfill.md | 203 +++++++++++++++++++++++++++++++++++++ docs/package.json | 1 + pnpm-lock.yaml | 3 + 4 files changed, 208 insertions(+) create mode 100644 docs/manual/backfill.md diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index 5c2a39a32..d58897b1f 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -137,6 +137,7 @@ const MANUAL = { { text: "Outbox listeners", link: "/manual/outbox.md" }, { text: "Sending activities", link: "/manual/send.md" }, { text: "Collections", link: "/manual/collections.md" }, + { text: "Conversation backfill", link: "/manual/backfill.md" }, { text: "Object dispatcher", link: "/manual/object.md" }, { text: "Access control", link: "/manual/access-control.md" }, { text: "WebFinger", link: "/manual/webfinger.md" }, diff --git a/docs/manual/backfill.md b/docs/manual/backfill.md new file mode 100644 index 000000000..74f52280f --- /dev/null +++ b/docs/manual/backfill.md @@ -0,0 +1,203 @@ +--- +description: >- + Reconstruct ActivityPub conversations from FEP-f228 context collections or + reply relationships using the @fedify/backfill package. +--- + +Conversation backfill +===================== + +*This API is available since Fedify 2.3.0.* + +Fedify provides the *@fedify/backfill* package for reconstructing ActivityPub +conversations that may be incomplete on the local server. It can retrieve +post-like objects from [FEP-f228] context collections and optionally crawl +`inReplyTo` ancestors and `replies` descendants. + +[FEP-f228]: https://w3id.org/fep/f228 + + +Installation +------------ + +::: code-group + +~~~~ sh [Deno] +deno add jsr:@fedify/backfill +~~~~ + +~~~~ sh [npm] +npm add @fedify/backfill +~~~~ + +~~~~ sh [pnpm] +pnpm add @fedify/backfill +~~~~ + +~~~~ sh [Yarn] +yarn add @fedify/backfill +~~~~ + +~~~~ sh [Bun] +bun add @fedify/backfill +~~~~ + +::: + + +Backfilling a conversation +-------------------------- + +The `backfill()` function accepts a backfill context, a seed object, and +traversal options. The context supplies a `documentLoader` for dereferencing +context collections, collection items, reply targets, and replies collections: + +~~~~ typescript twoslash +import { backfill, type BackfillDocumentLoader } from "@fedify/backfill"; +import { lookupObject, Note } from "@fedify/vocab"; + +declare const note: Note; +// ---cut-before--- +const documentLoader: BackfillDocumentLoader = (iri, options) => + lookupObject(iri, { signal: options?.signal }); + +for await ( + const item of backfill({ documentLoader }, note, { + maxItems: 20, + maxRequests: 50, + }) +) { + console.log(item.id?.href); +} +~~~~ + +The seed object itself is not yielded. If the same object appears in a +discovered collection, it is skipped by ID. + +By default, `backfill()` uses the `"context-auto"` strategy. It expects the +seed's `context` to dereference to a `Collection`, `OrderedCollection`, +`CollectionPage`, or `OrderedCollectionPage`. Ordinary post-like items are +yielded directly, while supported `Create` activities are unwrapped and their +objects are yielded. + +If the seed has no context, or its context resolves to a non-collection, +context strategies yield nothing. + + +Strategies +---------- + +Strategies run in the configured order. They share request and item budgets, +abort state, document caching, and object ID deduplication. If multiple +strategies discover the same object, the first one keeps its `BackfillItem` +metadata. + +`"context-auto"` +: Handles both direct post-like objects and supported `Create` activities + from a context collection. This is the default strategy. + +`"context-objects"` +: Accepts only post-like objects contained directly in a context collection: + + ~~~~ typescript twoslash + import { backfill, type BackfillContext } from "@fedify/backfill"; + import { Note } from "@fedify/vocab"; + + declare const context: BackfillContext; + declare const note: Note; + // ---cut-before--- + for await ( + const item of backfill(context, note, { + strategies: ["context-objects"], + }) + ) { + console.log(item.object); + } + ~~~~ + +`"context-activities"` +: Accepts supported activities from a context collection. It currently + supports `Create` and yields the activity's object rather than the activity + itself: + + ~~~~ typescript twoslash + import { backfill, type BackfillContext } from "@fedify/backfill"; + import { Note } from "@fedify/vocab"; + + declare const context: BackfillContext; + declare const note: Note; + // ---cut-before--- + for await ( + const item of backfill(context, note, { + strategies: ["context-activities"], + }) + ) { + console.log(item.object); + } + ~~~~ + +`"reply-tree"` +: Walks `inReplyTo` ancestors and `replies` descendants. It yields + post-like objects only and does not unwrap Activity objects. This strategy + is opt-in because it can require substantially more network requests than + a context collection. + +For hybrid coverage, run the FEP-f228 path first and use reply-tree traversal +after it: + +~~~~ typescript twoslash +import { backfill, type BackfillContext } from "@fedify/backfill"; +import { Note } from "@fedify/vocab"; + +declare const context: BackfillContext; +declare const note: Note; +// ---cut-before--- +for await ( + const item of backfill(context, note, { + strategies: ["context-auto", "reply-tree"], + maxDepth: 4, + }) +) { + console.log(item.origin, item.depth, item.object); +} +~~~~ + + +Traversal controls +------------------ + +`maxItems` +: Limits the number of yielded objects. Skipped duplicates do not count. + +`maxRequests` +: Limits calls to `documentLoader`. Embedded objects and collections do not + count as requests. + +`maxDepth` +: Limits reply-tree traversal and defaults to 10. Immediate parents and + direct replies have depth 1; their next-level parents or replies have depth + 2, and so on. Context collection items have depth 0 and are not limited by + this option. + +`interval` +: Adds a delay between `documentLoader` requests. A callback receives the + zero-based request index. String durations require the global `Temporal` + API or a polyfill; `Temporal.DurationLike` objects work without the global + API. + +`signal` +: Cancels traversal before requests and yields. The signal is also passed to + `documentLoader`. + + +Caching and failures +-------------------- + +Dereferenced documents are cached in memory for one `backfill()` traversal. +Applications that need persistent or shared caching can implement it in the +provided `documentLoader`. + +Failed external dereferences are skipped so other conversation items can still +be discovered. Failed loads are not retained in the traversal cache, allowing +the same IRI to be retried if another traversal path reaches it. Aborting the +provided signal stops traversal instead of skipping the request. diff --git a/docs/package.json b/docs/package.json index 9d59d59a7..6d6548d20 100644 --- a/docs/package.json +++ b/docs/package.json @@ -5,6 +5,7 @@ "@deno/kv": "^0.8.4", "@fedify/amqp": "workspace:^", "@fedify/astro": "workspace:^", + "@fedify/backfill": "workspace:^", "@fedify/cfworkers": "workspace:^", "@fedify/debugger": "workspace:^", "@fedify/express": "workspace:^", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index df3689951..b694adbe4 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -199,6 +199,9 @@ importers: '@fedify/astro': specifier: workspace:^ version: link:../packages/astro + '@fedify/backfill': + specifier: workspace:^ + version: link:../packages/backfill '@fedify/cfworkers': specifier: workspace:^ version: link:../packages/cfworkers From 4e9348ad6639bef6a4a518f81ba7a5a3f2344663 Mon Sep 17 00:00:00 2001 From: Jiwon Kwon Date: Wed, 24 Jun 2026 13:48:34 +0900 Subject: [PATCH 3/3] Add pull request 816 to CHANGES.md --- CHANGES.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index e7c2b6fab..9dd38cb0c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -227,12 +227,13 @@ To be released. It supports FEP-f228 context collections containing post-like objects or `Create` activities, optional reply-tree traversal, ordered hybrid strategies, shared safety budgets, deduplication, and traversal-local - document caching. [[#275], [#779], [#801], [#807] by Jiwon Kwon] + document caching. [[#275], [#779], [#801], [#807], [#816] by Jiwon Kwon] [#275]: https://github.com/fedify-dev/fedify/issues/275 [#779]: https://github.com/fedify-dev/fedify/pull/779 [#801]: https://github.com/fedify-dev/fedify/pull/801 [#807]: https://github.com/fedify-dev/fedify/pull/807 +[#816]: https://github.com/fedify-dev/fedify/pull/816 ### @fedify/fixture