diff --git a/src/main/kotlin/zed/rainxch/githubstore/db/DatabaseFactory.kt b/src/main/kotlin/zed/rainxch/githubstore/db/DatabaseFactory.kt index 35e5b8f..8b93f1a 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/db/DatabaseFactory.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/db/DatabaseFactory.kt @@ -96,6 +96,7 @@ object DatabaseFactory { "V15__license_info.sql", "V16__oauth_ephemeral.sql", "V17__signing_fingerprint_host.sql", + "V18__pushed_at.sql", ) for (migration in migrations) { val rawSql = this::class.java.classLoader diff --git a/src/main/kotlin/zed/rainxch/githubstore/db/MeilisearchClient.kt b/src/main/kotlin/zed/rainxch/githubstore/db/MeilisearchClient.kt index a01b70d..a7de8a1 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/db/MeilisearchClient.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/db/MeilisearchClient.kt @@ -152,6 +152,8 @@ data class MeiliRepoHit( val has_installers_linux: Boolean = false, val trending_score: Double? = null, val popularity_score: Double? = null, + // R5/R13: last commit timestamp; piped from GitHub pushed_at. + val pushed_at: String? = null, // Must be populated on every addDocuments() call — Meili's POST /documents // *replaces* the doc, so omitting this field wipes the SignalAggregationWorker's // most recent score. Null here is "no signal yet," not "no longer ranked." diff --git a/src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt b/src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt index c816249..96b25f3 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt @@ -95,6 +95,7 @@ class RepoRepository { releasesUrl = "${this[Repos.htmlUrl]}/releases", updatedAt = this[Repos.updatedAtGh]?.toString(), createdAt = this[Repos.createdAtGh]?.toString(), + pushedAt = this[Repos.pushedAtGh]?.toString(), latestReleaseDate = releaseDateStr, latestReleaseTag = this[Repos.latestReleaseTag], releaseRecency = recencyDays, diff --git a/src/main/kotlin/zed/rainxch/githubstore/db/SearchRepository.kt b/src/main/kotlin/zed/rainxch/githubstore/db/SearchRepository.kt index 56e5802..d55a4a8 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/db/SearchRepository.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/db/SearchRepository.kt @@ -55,7 +55,7 @@ class SearchRepository { has_installers_android, has_installers_windows, has_installers_macos, has_installers_linux, trending_score, popularity_score, search_score, - updated_at_gh, created_at_gh + updated_at_gh, created_at_gh, pushed_at_gh FROM repos """.trimIndent() ) @@ -115,6 +115,7 @@ class SearchRepository { releasesUrl = "${rs.getString("html_url")}/releases", updatedAt = rs.getString("updated_at_gh"), createdAt = rs.getString("created_at_gh"), + pushedAt = rs.getString("pushed_at_gh"), latestReleaseDate = releaseDateStr, latestReleaseTag = rs.getString("latest_release_tag"), releaseRecency = recencyDays, diff --git a/src/main/kotlin/zed/rainxch/githubstore/db/Tables.kt b/src/main/kotlin/zed/rainxch/githubstore/db/Tables.kt index fad5c63..675690b 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/db/Tables.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/db/Tables.kt @@ -36,6 +36,9 @@ object Repos : Table("repos") { val searchScore = float("search_score").nullable() val createdAtGh = timestampWithTimeZone("created_at_gh").nullable() val updatedAtGh = timestampWithTimeZone("updated_at_gh").nullable() + // R5/R13: last default-branch commit (GitHub pushed_at), distinct from + // updatedAtGh (last metadata change). Used by client Heartbeat animation. + val pushedAtGh = timestampWithTimeZone("pushed_at_gh").nullable() val indexedAt = timestampWithTimeZone("indexed_at") override val primaryKey = PrimaryKey(id) diff --git a/src/main/kotlin/zed/rainxch/githubstore/ingest/GitHubSearchClient.kt b/src/main/kotlin/zed/rainxch/githubstore/ingest/GitHubSearchClient.kt index 9c05764..61769cc 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/ingest/GitHubSearchClient.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/ingest/GitHubSearchClient.kt @@ -539,6 +539,9 @@ class GitHubSearchClient( it[hasInstallersLinux] = platforms["linux"] ?: false it[downloadCount] = r.downloadCount it[searchScore] = scoreToWrite + it[pushedAtGh] = repo.pushedAt?.let { + try { OffsetDateTime.parse(it) } catch (_: Exception) { null } + } it[indexedAt] = OffsetDateTime.now() } scoredByRepoId[repo.id] = scoreToWrite.toDouble() @@ -576,6 +579,7 @@ class GitHubSearchClient( has_installers_windows = r.platformFlags["windows"] ?: false, has_installers_macos = r.platformFlags["macos"] ?: false, has_installers_linux = r.platformFlags["linux"] ?: false, + pushed_at = r.repo.pushedAt, // Meili's POST /documents replaces the whole doc. Omitting this // would wipe the SignalAggregationWorker's most recent score // on every passthrough/refresh until the next hourly cycle. @@ -622,6 +626,7 @@ class GitHubSearchClient( releasesUrl = "${repo.htmlUrl}/releases", updatedAt = repo.updatedAt, createdAt = repo.createdAt, + pushedAt = repo.pushedAt, latestReleaseDate = releaseDateStr, latestReleaseTag = release.tagName, releaseRecency = recencyDays, @@ -682,6 +687,8 @@ data class GitHubRepo( val disabled: Boolean = false, @SerialName("updated_at") val updatedAt: String? = null, @SerialName("created_at") val createdAt: String? = null, + // R5/R13: last default-branch commit, distinct from updated_at (metadata change). + @SerialName("pushed_at") val pushedAt: String? = null, ) @Serializable diff --git a/src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt b/src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt index b1ac4d3..812e447 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt @@ -49,6 +49,10 @@ data class RepoResponse( val releasesUrl: String?, val updatedAt: String?, val createdAt: String?, + // R5/R13: last commit timestamp (GitHub pushed_at = default-branch HEAD). + // Distinct from updatedAt (last metadata change). Null for Meili-served + // search results until meili_sync.py backfills the field. + val pushedAt: String? = null, val latestReleaseDate: String? = null, val latestReleaseTag: String? = null, val releaseRecency: Int? = null, diff --git a/src/main/kotlin/zed/rainxch/githubstore/routes/InternalRoutes.kt b/src/main/kotlin/zed/rainxch/githubstore/routes/InternalRoutes.kt index bc1d680..b180183 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/routes/InternalRoutes.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/routes/InternalRoutes.kt @@ -16,6 +16,7 @@ import kotlinx.coroutines.launch import kotlinx.serialization.Serializable import org.jetbrains.exposed.sql.SqlExpressionBuilder.eq import org.jetbrains.exposed.sql.SqlExpressionBuilder.isNull +import java.time.OffsetDateTime import org.jetbrains.exposed.sql.selectAll import org.jetbrains.exposed.sql.transactions.TransactionManager import org.jetbrains.exposed.sql.transactions.experimental.newSuspendedTransaction @@ -160,6 +161,58 @@ fun Route.internalRoutes( ) } + // One-shot backfill for pushed_at_gh (V18). Iterates every repo + // where pushed_at_gh IS NULL, re-fetches from GitHub, and writes + // the field. Terminates once all rows are filled: + // - Ok / NoUsableRelease: real pushed_at from GitHub + // - Archived / Gone: COALESCE(updated_at_gh, indexed_at) as proxy + // so these rows are never reconsidered on subsequent runs + // - TransientFailure: left NULL, re-tried on next invocation + // Shares the same backfillRunning gate as /backfill-stale so the + // two never run concurrently and don't race the rotation pool. + post("/backfill-pushed-at") { + if (!authorized(call, adminToken)) { + return@post respondNotFound(call) + } + val limit = call.request.queryParameters["limit"] + ?.toIntOrNull() + ?.coerceIn(1, 10_000) + ?: 10_000 + if (!backfillRunning.compareAndSet(false, true)) { + call.response.header(HttpHeaders.RetryAfter, "60") + return@post call.respond( + HttpStatusCode.Conflict, + BackfillResponse(scheduled = 0, started = false, message = "backfill_already_running"), + ) + } + val candidates = transaction { + Repos.selectAll() + .where { Repos.pushedAtGh.isNull() } + .orderBy(Repos.id) + .limit(limit) + .map { it[Repos.id] to it[Repos.fullName] } + } + if (candidates.isEmpty()) { + backfillRunning.set(false) + return@post call.respond( + HttpStatusCode.OK, + BackfillResponse(scheduled = 0, started = false, message = "no rows missing pushed_at"), + ) + } + backfillScope.launch { + try { + runBackfill(searchClient, candidates) + } finally { + backfillRunning.set(false) + } + } + call.response.header(HttpHeaders.CacheControl, "no-store") + call.respond( + HttpStatusCode.Accepted, + BackfillResponse(scheduled = candidates.size, started = true), + ) + } + // Browser dashboard. Basic Auth required in prod so the browser prompts // for credentials on first visit; optional in dev for local inspection. authenticate(ADMIN_BASIC_AUTH, optional = adminToken == null) { @@ -232,8 +285,17 @@ private suspend fun runBackfill( upsertMetadataOnly(result.repo) metadataOnly++ } - GitHubSearchClient.RefreshResult.Gone -> gone++ - GitHubSearchClient.RefreshResult.Archived -> archived++ + GitHubSearchClient.RefreshResult.Gone -> { + // Repo deleted on GitHub — stamp with existing data so it + // doesn't reappear in pushed_at_gh IS NULL queries forever. + markPushedAtFallback(fullName) + gone++ + } + GitHubSearchClient.RefreshResult.Archived -> { + // Repo archived — same stamping rationale as Gone. + markPushedAtFallback(fullName) + archived++ + } GitHubSearchClient.RefreshResult.TransientFailure -> failed++ } delay(pacePerRepoMs) @@ -259,7 +321,29 @@ private fun upsertMetadataOnly(repo: GitHubRepo) { it[licenseSpdxId] = repo.license?.spdxId it[licenseName] = repo.license?.name it[description] = repo.description - it[indexedAt] = java.time.OffsetDateTime.now() + it[pushedAtGh] = repo.pushedAt?.let { raw -> + try { OffsetDateTime.parse(raw) } catch (_: Exception) { null } + } + it[indexedAt] = OffsetDateTime.now() + } + } +} + +// For Gone/Archived repos we have no live pushed_at from GitHub. +// Use the best available proxy from existing data so the row stops +// appearing in the pushed_at_gh IS NULL filter on future backfill runs. +private fun markPushedAtFallback(fullName: String) { + transaction { + val conn = TransactionManager.current().connection.connection as java.sql.Connection + conn.prepareStatement( + """ + UPDATE repos + SET pushed_at_gh = COALESCE(updated_at_gh, indexed_at) + WHERE full_name = ? AND pushed_at_gh IS NULL + """.trimIndent() + ).use { ps -> + ps.setString(1, fullName) + ps.executeUpdate() } } } diff --git a/src/main/kotlin/zed/rainxch/githubstore/routes/RepoRoutes.kt b/src/main/kotlin/zed/rainxch/githubstore/routes/RepoRoutes.kt index b2d754b..46ec49b 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/routes/RepoRoutes.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/routes/RepoRoutes.kt @@ -143,4 +143,5 @@ internal fun GitHubRepo.toMetadataOnlyResponse(): RepoResponse = RepoResponse( releasesUrl = "$htmlUrl/releases", updatedAt = updatedAt, createdAt = createdAt, + pushedAt = pushedAt, ) diff --git a/src/main/kotlin/zed/rainxch/githubstore/routes/SearchRoutes.kt b/src/main/kotlin/zed/rainxch/githubstore/routes/SearchRoutes.kt index 3d84262..cf7f21b 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/routes/SearchRoutes.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/routes/SearchRoutes.kt @@ -292,6 +292,7 @@ private fun zed.rainxch.githubstore.db.MeiliRepoHit.toRepoResponse() = RepoRespo releasesUrl = "$html_url/releases", updatedAt = null, createdAt = null, + pushedAt = pushed_at, latestReleaseDate = latest_release_date, latestReleaseTag = latest_release_tag, downloadCount = download_count, diff --git a/src/main/resources/db/migration/V18__pushed_at.sql b/src/main/resources/db/migration/V18__pushed_at.sql new file mode 100644 index 0000000..cbc4221 --- /dev/null +++ b/src/main/resources/db/migration/V18__pushed_at.sql @@ -0,0 +1,4 @@ +-- R5/R13: Add pushed_at_gh to distinguish last-commit timestamp (GitHub's +-- pushed_at / default-branch HEAD) from updated_at_gh (last metadata change). +-- Clients use this for the Heartbeat animation period. +ALTER TABLE repos ADD COLUMN pushed_at_gh TIMESTAMPTZ;