Skip to content
This repository was archived by the owner on Nov 26, 2022. It is now read-only.

Commit bad37ec

Browse files
committed
liveness check: use periodic check-ins. this is missing a migration
1 parent d138519 commit bad37ec

3 files changed

Lines changed: 35 additions & 10 deletions

File tree

model.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,3 +367,7 @@ class MastodonInstance(db.Model):
367367

368368
def bump(self, value=1):
369369
self.popularity = (self.popularity or 10) + value
370+
371+
class WorkerCheckin(db.Model, TimestampMixin):
372+
__tablename__ = 'worker_checkins'
373+
id = db.Column(db.Integer, primary_key=True)

routes/api.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
from app import app, db, imgproxy
22
from libforget.auth import require_auth_api, get_viewer
33
from flask import jsonify, redirect, make_response, request, Response
4-
from model import Account
4+
from model import Account, WorkerCheckin
55
import libforget.settings
66
import libforget.json
77
import random
8+
from datetime import datetime, timedelta
89

910
@app.route('/api/health_check') # deprecated 2021-03-12
1011
@app.route('/api/status_check')
@@ -19,14 +20,23 @@ def api_status_check():
1920
except Exception:
2021
return ('Redis bad', 500)
2122

22-
if db.session.execute(db.text("""
23-
SELECT 1 FROM accounts
24-
WHERE last_delete > now() - '60 minutes'::INTERVAL
25-
OR last_fetch > now() - '60 minutes'::INTERVAL
26-
OR last_refresh > now() - '60 minutes'::INTERVAL
27-
LIMIT 1;
28-
""")).fetchone() is None:
29-
return ('Celery stalled', 500)
23+
CHECKIN_EVENTS = 5
24+
CHECKIN_PERIOD = timedelta(minutes=10)
25+
# sorry about the obtuse variable names, this trips if the frequency is
26+
# lower than events/period
27+
checkin_count = db.session.query(WorkerCheckin)\
28+
.filter(WorkerCheckin.created_at > db.func.now() - CHECKIN_PERIOD)\
29+
.count()
30+
if checkin_count < events:
31+
return ('Celery slow, {} check-ins in {}'.format(
32+
checkin_count, CHECKIN_PERIOD
33+
), 500)
34+
35+
CHECKIN_LATENESS_THRESHOLD = timedelta(minutes=5)
36+
checkin = db.session.query(WorkerCheckin.created_at)\
37+
.order_by(db.desc(WorkerCheckin.created_at)).first()
38+
if checkin + CHECKIN_LATENESS_THRESHOLD < datetime.utcnow():
39+
return ('Celery late, last check-in was {}'.format(checkin), 500)
3040

3141
return 'OK'
3242

tasks.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from app import app as flaskapp
33
from app import db
44
from model import Session, Account, TwitterArchive, Post, OAuthToken,\
5-
MastodonInstance
5+
MastodonInstance, WorkerCheckin
66
import libforget.twitter
77
import libforget.mastodon
88
from datetime import timedelta, datetime, timezone
@@ -386,6 +386,11 @@ def periodic_cleanup():
386386
you have restored access and you can now re-enable Forget if you wish.
387387
""".format(service=account.service.capitalize())
388388

389+
# delete worker check-ins after 48 hours
390+
(WorkerCheckin.query.filter(
391+
WorkerCheckin.updated_at < (db.func.now() - timedelta(hours=48)))
392+
.delete(synchronize_session=False))
393+
389394
db.session.commit()
390395

391396

@@ -474,13 +479,19 @@ def update_mastodon_instances_popularity():
474479
})
475480
db.session.commit()
476481

482+
@app.task
483+
def report_in():
484+
db.session.add(WorkerCheckin())
485+
db.session.commit()
486+
477487

478488
app.add_periodic_task(40, queue_fetch_for_most_stale_accounts)
479489
app.add_periodic_task(9, queue_deletes)
480490
app.add_periodic_task(6, refresh_account_with_oldest_post)
481491
app.add_periodic_task(50, refresh_account_with_longest_time_since_refresh)
482492
app.add_periodic_task(300, periodic_cleanup)
483493
app.add_periodic_task(300, update_mastodon_instances_popularity)
494+
app.add_periodic_task(60, report_in)
484495

485496
if __name__ == '__main__':
486497
app.worker_main()

0 commit comments

Comments
 (0)