|
| 1 | +import * as fs from "node:fs"; |
| 2 | +import * as path from "node:path"; |
| 3 | +import { layoutSemanticNodes } from "../src/utils/graphSemanticLayout"; |
| 4 | +import { getNobelReferences } from "../src/utils/nobelReferences"; |
| 5 | +import { getPublicDomainLabel } from "../src/utils/publicDomains"; |
| 6 | +import type { MapData } from "../src/utils/types"; |
| 7 | + |
| 8 | +function mean(values: number[]): number { |
| 9 | + return values.reduce((sum, value) => sum + value, 0) / Math.max(values.length, 1); |
| 10 | +} |
| 11 | + |
| 12 | +function range(values: number[]): { min: number; max: number; span: number } { |
| 13 | + const min = Math.min(...values); |
| 14 | + const max = Math.max(...values); |
| 15 | + return { min, max, span: max - min }; |
| 16 | +} |
| 17 | + |
| 18 | +const mapPath = path.resolve(process.cwd(), "dist/map.json"); |
| 19 | +const map = JSON.parse(fs.readFileSync(mapPath, "utf8")) as MapData; |
| 20 | +const semanticNodes = layoutSemanticNodes(map.nodes); |
| 21 | +const nobelReferences = getNobelReferences(); |
| 22 | + |
| 23 | +const canopyOrder = [ |
| 24 | + "Earth & Space", |
| 25 | + "Economics", |
| 26 | + "Engineering", |
| 27 | + "Chemistry", |
| 28 | + "Life Science", |
| 29 | + "Medicine", |
| 30 | + "Mathematics", |
| 31 | + "Physics", |
| 32 | +]; |
| 33 | + |
| 34 | +const canopyMeans = canopyOrder.map((label) => { |
| 35 | + const items = semanticNodes.filter((item) => getPublicDomainLabel(item.node.domain) === label); |
| 36 | + return { |
| 37 | + label, |
| 38 | + meanX: mean(items.map((item) => item.x)), |
| 39 | + meanY: mean(items.map((item) => item.y)), |
| 40 | + }; |
| 41 | +}); |
| 42 | + |
| 43 | +const upperDomainClouds = canopyOrder.map((label) => { |
| 44 | + const items = semanticNodes.filter((item) => getPublicDomainLabel(item.node.domain) === label); |
| 45 | + const xs = items.map((item) => item.x); |
| 46 | + const ys = items.map((item) => item.y); |
| 47 | + const xRange = range(xs); |
| 48 | + const yRange = range(ys); |
| 49 | + const area = xRange.span * yRange.span; |
| 50 | + return { |
| 51 | + label, |
| 52 | + meanX: mean(xs), |
| 53 | + meanY: mean(ys), |
| 54 | + spreadX: xRange.span, |
| 55 | + spreadY: yRange.span, |
| 56 | + aspectRatio: xRange.span / Math.max(yRange.span, 1e-6), |
| 57 | + area, |
| 58 | + }; |
| 59 | +}); |
| 60 | + |
| 61 | +const canopyGaps = canopyMeans.slice(1).map((item, index) => item.meanX - canopyMeans[index].meanX); |
| 62 | +const canopyYGaps = canopyMeans.slice(1).map((item, index) => item.meanY - canopyMeans[index].meanY); |
| 63 | +const canopyWidth = |
| 64 | + (canopyMeans[canopyMeans.length - 1]?.meanX ?? 0) - (canopyMeans[0]?.meanX ?? 0); |
| 65 | +const canopyCenterX = |
| 66 | + ((canopyMeans[0]?.meanX ?? 0) + (canopyMeans[canopyMeans.length - 1]?.meanX ?? 0)) / 2; |
| 67 | +const outerCanopyMeanY = mean([ |
| 68 | + canopyMeans[0]?.meanY ?? 0, |
| 69 | + canopyMeans[1]?.meanY ?? 0, |
| 70 | + canopyMeans[6]?.meanY ?? 0, |
| 71 | + canopyMeans[7]?.meanY ?? 0, |
| 72 | +]); |
| 73 | +const centerCanopyMeanY = mean([ |
| 74 | + canopyMeans[3]?.meanY ?? 0, |
| 75 | + canopyMeans[4]?.meanY ?? 0, |
| 76 | +]); |
| 77 | +const shoulderCanopyMeanY = mean([ |
| 78 | + canopyMeans[2]?.meanY ?? 0, |
| 79 | + canopyMeans[5]?.meanY ?? 0, |
| 80 | +]); |
| 81 | + |
| 82 | +const pretraining = semanticNodes.filter( |
| 83 | + (item) => item.node.label === "Minimize val_loss for GPT-2 124M on FineWeb (1h budget)", |
| 84 | +); |
| 85 | +const posttraining = semanticNodes.filter( |
| 86 | + (item) => item.node.label === "Maximize math reasoning accuracy via GRPO on Qwen2.5-Math-1.5B", |
| 87 | +); |
| 88 | + |
| 89 | +const preX = range(pretraining.map((item) => item.x)); |
| 90 | +const preY = range(pretraining.map((item) => item.y)); |
| 91 | +const postX = range(posttraining.map((item) => item.x)); |
| 92 | +const postY = range(posttraining.map((item) => item.y)); |
| 93 | +const modelCompression = semanticNodes.filter((item) => item.node.domain === "model_compression"); |
| 94 | + |
| 95 | +const overlapX = Math.min(preX.max, postX.max) - Math.max(preX.min, postX.min); |
| 96 | +const overlapY = Math.min(preY.max, postY.max) - Math.max(preY.min, postY.min); |
| 97 | +const unionWidth = Math.max(preX.max, postX.max) - Math.min(preX.min, postX.min); |
| 98 | +const unionHeight = Math.max(preY.max, postY.max) - Math.min(preY.min, postY.min); |
| 99 | +const unionAspectRatio = unionWidth / Math.max(unionHeight, 1e-6); |
| 100 | +const informationMeanY = mean([ |
| 101 | + mean(pretraining.map((item) => item.y)), |
| 102 | + mean(posttraining.map((item) => item.y)), |
| 103 | +]); |
| 104 | +const informationMeanX = mean([ |
| 105 | + mean(pretraining.map((item) => item.x)), |
| 106 | + mean(posttraining.map((item) => item.x)), |
| 107 | +]); |
| 108 | +const nobelPeripheryCount = nobelReferences.filter( |
| 109 | + (reference) => |
| 110 | + reference.x < 0.31 || reference.x > 0.67 || reference.y < 0.28 || reference.y > 0.66, |
| 111 | +).length; |
| 112 | +const nobelInnerCount = nobelReferences.filter( |
| 113 | + (reference) => |
| 114 | + reference.x > 0.39 && reference.x < 0.61 && reference.y > 0.29 && reference.y < 0.6, |
| 115 | +).length; |
| 116 | + |
| 117 | +console.log( |
| 118 | + JSON.stringify( |
| 119 | + { |
| 120 | + canopy: { |
| 121 | + means: canopyMeans.map((item) => ({ |
| 122 | + ...item, |
| 123 | + meanX: Number(item.meanX.toFixed(3)), |
| 124 | + meanY: Number(item.meanY.toFixed(3)), |
| 125 | + })), |
| 126 | + clouds: upperDomainClouds.map((item) => ({ |
| 127 | + label: item.label, |
| 128 | + meanX: Number(item.meanX.toFixed(3)), |
| 129 | + meanY: Number(item.meanY.toFixed(3)), |
| 130 | + spreadX: Number(item.spreadX.toFixed(3)), |
| 131 | + spreadY: Number(item.spreadY.toFixed(3)), |
| 132 | + aspectRatio: Number(item.aspectRatio.toFixed(3)), |
| 133 | + area: Number(item.area.toFixed(4)), |
| 134 | + })), |
| 135 | + gaps: canopyGaps.map((gap) => Number(gap.toFixed(3))), |
| 136 | + yGaps: canopyYGaps.map((gap) => Number(gap.toFixed(3))), |
| 137 | + maxGap: Number(Math.max(...canopyGaps).toFixed(3)), |
| 138 | + minGap: Number(Math.min(...canopyGaps).toFixed(3)), |
| 139 | + width: Number(canopyWidth.toFixed(3)), |
| 140 | + centerX: Number(canopyCenterX.toFixed(3)), |
| 141 | + maxAbsYGap: Number(Math.max(...canopyYGaps.map((gap) => Math.abs(gap))).toFixed(3)), |
| 142 | + outerMeanY: Number(outerCanopyMeanY.toFixed(3)), |
| 143 | + centerMeanY: Number(centerCanopyMeanY.toFixed(3)), |
| 144 | + shoulderMeanY: Number(shoulderCanopyMeanY.toFixed(3)), |
| 145 | + }, |
| 146 | + informationCloud: { |
| 147 | + meanX: Number(informationMeanX.toFixed(3)), |
| 148 | + meanY: Number(informationMeanY.toFixed(3)), |
| 149 | + centerOffsetFromCanopy: Number(Math.abs(informationMeanX - canopyCenterX).toFixed(3)), |
| 150 | + gapToShoulder: Number((informationMeanY - shoulderCanopyMeanY).toFixed(3)), |
| 151 | + pretrainingMeanX: Number(mean(pretraining.map((item) => item.x)).toFixed(3)), |
| 152 | + pretrainingMeanY: Number(mean(pretraining.map((item) => item.y)).toFixed(3)), |
| 153 | + pretrainingSpreadX: Number(preX.span.toFixed(3)), |
| 154 | + pretrainingSpreadY: Number(preY.span.toFixed(3)), |
| 155 | + posttrainingMeanX: Number(mean(posttraining.map((item) => item.x)).toFixed(3)), |
| 156 | + posttrainingMeanY: Number(mean(posttraining.map((item) => item.y)).toFixed(3)), |
| 157 | + posttrainingSpreadX: Number(postX.span.toFixed(3)), |
| 158 | + posttrainingSpreadY: Number(postY.span.toFixed(3)), |
| 159 | + centroidGapX: Number(Math.abs(mean(pretraining.map((item) => item.x)) - mean(posttraining.map((item) => item.x))).toFixed(3)), |
| 160 | + centroidGapY: Number(Math.abs(mean(pretraining.map((item) => item.y)) - mean(posttraining.map((item) => item.y))).toFixed(3)), |
| 161 | + overlapX: Number(overlapX.toFixed(3)), |
| 162 | + overlapY: Number(overlapY.toFixed(3)), |
| 163 | + unionWidth: Number(unionWidth.toFixed(3)), |
| 164 | + unionHeight: Number(unionHeight.toFixed(3)), |
| 165 | + unionAspectRatio: Number(unionAspectRatio.toFixed(3)), |
| 166 | + }, |
| 167 | + bridges: { |
| 168 | + modelCompressionMeanX: Number(mean(modelCompression.map((item) => item.x)).toFixed(3)), |
| 169 | + modelCompressionMeanY: Number(mean(modelCompression.map((item) => item.y)).toFixed(3)), |
| 170 | + }, |
| 171 | + nobel: { |
| 172 | + count: nobelReferences.length, |
| 173 | + peripheryCount: nobelPeripheryCount, |
| 174 | + peripheryRatio: Number((nobelPeripheryCount / Math.max(nobelReferences.length, 1)).toFixed(3)), |
| 175 | + innerCount: nobelInnerCount, |
| 176 | + }, |
| 177 | + }, |
| 178 | + null, |
| 179 | + 2, |
| 180 | + ), |
| 181 | +); |
0 commit comments