|
8 | 8 | Usage (local emulator): |
9 | 9 | DATASTORE_EMULATOR_HOST=localhost:8081 python ide/shell_cmds/buildUserHistory.py |
10 | 10 |
|
| 11 | +Usage (dry-run on first N users, no write): |
| 12 | + GOOGLE_CLOUD_PROJECT=glowscript-py38 python ide/shell_cmds/buildUserHistory.py --limit 500 |
| 13 | +
|
| 14 | +Usage (limited run + write): |
| 15 | + GOOGLE_CLOUD_PROJECT=glowscript-py38 python ide/shell_cmds/buildUserHistory.py --limit 500 --write |
| 16 | +
|
11 | 17 | Cost: ~$0.16 (reads ~315k User entities). Run once only. |
12 | 18 | """ |
13 | 19 |
|
| 20 | +import argparse |
14 | 21 | from collections import defaultdict |
15 | 22 | from datetime import datetime, timezone |
16 | 23 | import json |
17 | 24 | import os |
| 25 | +import sys |
18 | 26 |
|
19 | 27 | from google.cloud import ndb |
20 | 28 | from ide.models import User, Setting |
21 | 29 |
|
22 | 30 |
|
23 | | -def build_history(client): |
| 31 | +def build_history(client, limit=None, write=True): |
24 | 32 | monthly_new = defaultdict(int) |
25 | 33 | count = 0 |
26 | 34 | skipped = 0 |
27 | 35 |
|
28 | 36 | with client.context(): |
29 | | - for user in User.query(): # NDB iterates in batches — does not load all into memory |
| 37 | + query = User.query() |
| 38 | + if limit: |
| 39 | + query = query.fetch(limit) |
| 40 | + else: |
| 41 | + query = query.iter() # NDB iterates in batches — does not load all into memory |
| 42 | + |
| 43 | + for user in query: |
30 | 44 | count += 1 |
31 | 45 | if count % 10000 == 0: |
32 | 46 | print(f" {count} users processed...") |
@@ -56,29 +70,44 @@ def build_history(client): |
56 | 70 | 'points': points, |
57 | 71 | } |
58 | 72 |
|
| 73 | + print(f"Built {len(points)} monthly data points") |
| 74 | + if points: |
| 75 | + print(f"Latest: {points[-1]}") |
| 76 | + |
| 77 | + if not write: |
| 78 | + print("Dry-run: skipping Datastore write (pass --write to persist)") |
| 79 | + return |
| 80 | + |
59 | 81 | existing = ndb.Key('Setting', 'user_count_history').get() |
60 | 82 | if not existing: |
61 | 83 | existing = Setting(id='user_count_history') |
62 | 84 | existing.value = json.dumps(history) |
63 | 85 | try: |
64 | 86 | existing.put() |
65 | 87 | except Exception as e: |
66 | | - import sys |
67 | 88 | print(f"ERROR: Failed to store user history: {e}", file=sys.stderr) |
68 | 89 | sys.exit(1) |
69 | 90 |
|
70 | 91 | print(f"Stored {len(points)} monthly data points") |
71 | | - if points: |
72 | | - print(f"Latest: {points[-1]}") |
73 | 92 |
|
74 | 93 |
|
75 | 94 | if __name__ == '__main__': |
76 | | - import sys |
| 95 | + parser = argparse.ArgumentParser() |
| 96 | + parser.add_argument('--limit', type=int, default=None, |
| 97 | + help='Process only the first N users (implies dry-run unless --write is also set)') |
| 98 | + parser.add_argument('--write', action='store_true', |
| 99 | + help='Write results to Datastore (required when using --limit; always writes without --limit)') |
| 100 | + args = parser.parse_args() |
| 101 | + |
| 102 | + write = args.write if args.limit else True |
| 103 | + |
77 | 104 | project = os.environ.get('GOOGLE_CLOUD_PROJECT', 'glowscript') |
78 | 105 | emulator = os.environ.get('DATASTORE_EMULATOR_HOST') |
79 | 106 | client = ndb.Client(project=project) |
80 | 107 | print(f"Connecting to {'emulator at ' + emulator if emulator else 'production Datastore'}...") |
| 108 | + if args.limit: |
| 109 | + print(f"Limit: {args.limit} users ({'will write' if write else 'dry-run, no write'})") |
81 | 110 | try: |
82 | | - build_history(client) |
| 111 | + build_history(client, limit=args.limit, write=write) |
83 | 112 | finally: |
84 | 113 | client.close() |
0 commit comments