Skip to content

Commit 0cb69c5

Browse files
sspickleclaude
andcommitted
feat: add buildUserHistory migration script
One-time script to read all User joinDates from Datastore, bucket by month, build cumulative history, and store as Setting entity 'user_count_history'. Cost ~$0.16 (315k reads). Run once only. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 9ec2240 commit 0cb69c5

1 file changed

Lines changed: 71 additions & 0 deletions

File tree

ide/shell_cmds/buildUserHistory.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
One-time migration: read all User joinDates, build monthly cumulative history,
3+
store in Setting entity 'user_count_history'.
4+
5+
Usage (production):
6+
GOOGLE_CLOUD_PROJECT=glowscript python ide/shell_cmds/buildUserHistory.py
7+
8+
Usage (local emulator):
9+
DATASTORE_EMULATOR_HOST=localhost:8081 python ide/shell_cmds/buildUserHistory.py
10+
11+
Cost: ~$0.16 (reads ~315k User entities). Run once only.
12+
"""
13+
14+
from collections import defaultdict
15+
from datetime import datetime, timezone
16+
import json
17+
import os
18+
19+
from google.cloud import ndb
20+
from ide.models import User, Setting
21+
22+
23+
def build_history(client):
24+
monthly_new = defaultdict(int)
25+
count = 0
26+
skipped = 0
27+
28+
with client.context():
29+
for user in User.query(): # NDB iterates in batches — does not load all into memory
30+
count += 1
31+
if count % 10000 == 0:
32+
print(f" {count} users processed...")
33+
34+
join_date = getattr(user, 'joinDate', None)
35+
if join_date:
36+
month_str = join_date.strftime('%Y-%m')
37+
monthly_new[month_str] += 1
38+
else:
39+
skipped += 1
40+
41+
print(f"Done: {count} users, {skipped} skipped (no joinDate)")
42+
43+
sorted_months = sorted(monthly_new.keys())
44+
cumulative = 0
45+
points = []
46+
for month in sorted_months:
47+
cumulative += monthly_new[month]
48+
points.append({'month': month, 'count': cumulative})
49+
50+
history = {
51+
'updated': datetime.now(timezone.utc).strftime('%Y-%m-%d'),
52+
'points': points,
53+
}
54+
55+
existing = ndb.Key('Setting', 'user_count_history').get()
56+
if not existing:
57+
existing = Setting(id='user_count_history')
58+
existing.value = json.dumps(history)
59+
existing.put()
60+
61+
print(f"Stored {len(points)} monthly data points")
62+
if points:
63+
print(f"Latest: {points[-1]}")
64+
65+
66+
if __name__ == '__main__':
67+
project = os.environ.get('GOOGLE_CLOUD_PROJECT', 'glowscript')
68+
emulator = os.environ.get('DATASTORE_EMULATOR_HOST')
69+
client = ndb.Client(project='glowscript-dev' if emulator else project)
70+
print(f"Connecting to {'emulator at ' + emulator if emulator else 'production Datastore'}...")
71+
build_history(client)

0 commit comments

Comments
 (0)