From f96d639cfd56bd5546b1d1c880a001fbe128c6d5 Mon Sep 17 00:00:00 2001
From: Shen Zhang
Date: Mon, 15 Dec 2025 18:14:41 -0600
Subject: [PATCH 1/2] made columns closer
---
src/pages/LeaderboardPage.css | 18 ++++++------------
1 file changed, 6 insertions(+), 12 deletions(-)
diff --git a/src/pages/LeaderboardPage.css b/src/pages/LeaderboardPage.css
index bac938a..26965a8 100644
--- a/src/pages/LeaderboardPage.css
+++ b/src/pages/LeaderboardPage.css
@@ -22,7 +22,7 @@
}
.leaderboard-full-bleed .leaderboard-container {
- width:90vw;
+ width:85vw;
margin: 0 auto;
}
@@ -339,8 +339,8 @@
--lb-grid:
minmax(0, 0.5fr) /* Select */
minmax(0, 0.7fr) /* Rank */
- minmax(0, 2.0fr) /* Name */
- minmax(0, 1.1fr) /* Affiliation */
+ minmax(0, 1.8fr) /* Name */
+ minmax(0, 0.7fr) /* Affiliation */
minmax(0, 0.9fr) /* Arena */
minmax(0, 0.9fr) /* Accuracy */
minmax(0, 0.9fr) /* Cost/1k */
@@ -1407,7 +1407,7 @@
.metrics-col {
display: flex;
justify-content: center;
- align-items: center;
+ align-items: left;
}
.metric-value {
@@ -1781,13 +1781,6 @@
}
-/* Preserve left alignment for name and affiliation columns */
-.leaderboard-header .name-col,
-.leaderboard-header .affiliation-col,
-.leaderboard-row .name-col,
-.leaderboard-row .affiliation-col {
- text-align: left;
-}
/* Fix type badge and prevent flex growth */
.type-col {
@@ -1820,7 +1813,8 @@
/* put padding on cells (same for header and rows) */
.leaderboard-header > div,
.leaderboard-row > div {
- padding: 0.75rem 0.75rem !important;
+ padding: 0.75rem 0rem !important;
+ text-align: left;
overflow: hidden;
/* text-overflow: ellipsis; */
white-space: nowrap;
From 89e827d3f10115a61d7a637ae595cdfd5d8bdb5f Mon Sep 17 00:00:00 2001
From: Shen Zhang
Date: Mon, 22 Dec 2025 15:39:36 -0500
Subject: [PATCH 2/2] Added flip label files, updated router robustness data
for router comparison
---
src/data/flip_labels/flip_labels_azure.json | 1622 +++++++++++
.../flip_labels/flip_labels_graphrouter.json | 1682 ++++++++++++
.../flip_labels/flip_labels_notdiamond.json | 1626 +++++++++++
src/data/routerMetrics/category_scores.json | 2442 ++++++++---------
src/data/routerMetrics/leaderboard.json | 22 +-
src/pages/LeaderboardPage.tsx | 2 +-
6 files changed, 6163 insertions(+), 1233 deletions(-)
create mode 100644 src/data/flip_labels/flip_labels_azure.json
create mode 100644 src/data/flip_labels/flip_labels_graphrouter.json
create mode 100644 src/data/flip_labels/flip_labels_notdiamond.json
diff --git a/src/data/flip_labels/flip_labels_azure.json b/src/data/flip_labels/flip_labels_azure.json
new file mode 100644
index 0000000..abd8a07
--- /dev/null
+++ b/src/data/flip_labels/flip_labels_azure.json
@@ -0,0 +1,1622 @@
+[
+ {
+ "global index": "AIME_112",
+ "flip": 0
+ },
+ {
+ "global index": "AIME_58",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_12",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_123",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_16",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_182",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_230",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_293",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_349",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_378",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_443",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_496",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_631",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_646",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_659",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_676",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_685",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_689",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_702",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_713",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_98",
+ "flip": 0
+ },
+ {
+ "global index": "AsDiv_1165",
+ "flip": 1
+ },
+ {
+ "global index": "AsDiv_1347",
+ "flip": 0
+ },
+ {
+ "global index": "AsDiv_472",
+ "flip": 1
+ },
+ {
+ "global index": "AsDiv_733",
+ "flip": 1
+ },
+ {
+ "global index": "ChessInstruct_0",
+ "flip": 1
+ },
+ {
+ "global index": "ChessInstruct_107",
+ "flip": 1
+ },
+ {
+ "global index": "ChessInstruct_144",
+ "flip": 1
+ },
+ {
+ "global index": "ChessInstruct_42",
+ "flip": 1
+ },
+ {
+ "global index": "ChessInstruct_58",
+ "flip": 1
+ },
+ {
+ "global index": "ChessInstruct_71",
+ "flip": 1
+ },
+ {
+ "global index": "ChessInstruct_84",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_commonsense_28",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_commonsense_51",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_commonsense_6",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_commonsense_62",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_commonsense_70",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_commonsense_85",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_commonsense_90",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_deontology_0",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_deontology_2",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_deontology_31",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_deontology_32",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_deontology_56",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_justice_1",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_justice_45",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_justice_76",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_justice_84",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_virtue_14",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_virtue_26",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_virtue_30",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_virtue_48",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_virtue_51",
+ "flip": 1
+ },
+ {
+ "global index": "FinQA_149",
+ "flip": 0
+ },
+ {
+ "global index": "FinQA_208",
+ "flip": 1
+ },
+ {
+ "global index": "FinQA_56",
+ "flip": 0
+ },
+ {
+ "global index": "FinQA_60",
+ "flip": 0
+ },
+ {
+ "global index": "GSM8K_43",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_1002",
+ "flip": 1
+ },
+ {
+ "global index": "GeoBench_1094",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_1102",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_1113",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_124",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_1243",
+ "flip": 1
+ },
+ {
+ "global index": "GeoBench_30",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_502",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_526",
+ "flip": 1
+ },
+ {
+ "global index": "GeoBench_591",
+ "flip": 1
+ },
+ {
+ "global index": "GeoBench_766",
+ "flip": 1
+ },
+ {
+ "global index": "GeoBench_87",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_915",
+ "flip": 1
+ },
+ {
+ "global index": "GeoBench_944",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_968",
+ "flip": 1
+ },
+ {
+ "global index": "LiveCodeBench_105",
+ "flip": 1
+ },
+ {
+ "global index": "LiveCodeBench_114",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_118",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_131",
+ "flip": 1
+ },
+ {
+ "global index": "LiveCodeBench_136",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_181",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_237",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_271",
+ "flip": 1
+ },
+ {
+ "global index": "LiveCodeBench_350",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_386",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_405",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_43",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_431",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_437",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_476",
+ "flip": 1
+ },
+ {
+ "global index": "LiveCodeBench_485",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_49",
+ "flip": 1
+ },
+ {
+ "global index": "LiveCodeBench_491",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_499",
+ "flip": 1
+ },
+ {
+ "global index": "MATH_108",
+ "flip": 0
+ },
+ {
+ "global index": "MATH_442",
+ "flip": 1
+ },
+ {
+ "global index": "MATH_53",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_biology_2808",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_biology_2912",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_biology_2980",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_biology_2985",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_biology_3188",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_biology_3215",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_biology_3225",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_business_226",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_business_294",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_business_378",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_business_430",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_business_503",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_business_507",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_business_6",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_business_784",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_chemistry_3796",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_chemistry_3837",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_chemistry_3974",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_chemistry_4067",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_chemistry_4407",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9086",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9110",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9136",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9138",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9149",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9200",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9212",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9239",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9264",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9285",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9289",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9414",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9415",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9430",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9452",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9471",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9475",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_economics_5769",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_economics_5907",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_economics_5931",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_economics_5965",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_economics_6114",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_economics_6122",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_economics_6135",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_economics_6325",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_economics_6353",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10076",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_engineering_10125",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_engineering_10179",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10195",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_engineering_10199",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_engineering_10298",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_engineering_10342",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_engineering_10395",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_engineering_10428",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10432",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10473",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_engineering_10537",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_engineering_10701",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_engineering_10823",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_engineering_10864",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_health_4885",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_health_4973",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_health_5093",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_health_5144",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_health_5214",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_health_5215",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_health_5261",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_health_5473",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_health_5514",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_history_4486",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_history_4490",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4497",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_history_4509",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4517",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_history_4523",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4605",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_history_4629",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4638",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4717",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4749",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_history_4752",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_history_4774",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4810",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4833",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4836",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_history_4841",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_law_1007",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_law_1031",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_law_1386",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_law_1462",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_law_1484",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_law_1518",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_law_1818",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_law_806",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_law_899",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_math_6429",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_math_6526",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_math_6623",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_math_6848",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_math_7101",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_math_7249",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_math_7284",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_math_7451",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_math_7577",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_philosophy_9510",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_philosophy_9536",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_philosophy_9663",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_philosophy_9672",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_philosophy_9943",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_physics_7773",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_physics_7887",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_physics_7893",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_physics_8888",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_physics_9017",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_psychology_2005",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_psychology_2186",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_psychology_2329",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_psychology_2406",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_psychology_2420",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_psychology_2450",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_psychology_2457",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_psychology_2524",
+ "flip": 0
+ },
+ {
+ "global index": "MathQA_158",
+ "flip": 1
+ },
+ {
+ "global index": "MathQA_1742",
+ "flip": 0
+ },
+ {
+ "global index": "MathQA_202",
+ "flip": 0
+ },
+ {
+ "global index": "MathQA_2092",
+ "flip": 1
+ },
+ {
+ "global index": "MathQA_2102",
+ "flip": 0
+ },
+ {
+ "global index": "MathQA_2851",
+ "flip": 0
+ },
+ {
+ "global index": "MathQA_827",
+ "flip": 0
+ },
+ {
+ "global index": "MathQA_84",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_1005",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_1054",
+ "flip": 1
+ },
+ {
+ "global index": "MedMCQA_1298",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_1309",
+ "flip": 1
+ },
+ {
+ "global index": "MedMCQA_1362",
+ "flip": 1
+ },
+ {
+ "global index": "MedMCQA_145",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_2010",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_2366",
+ "flip": 1
+ },
+ {
+ "global index": "MedMCQA_2581",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_511",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_59",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_643",
+ "flip": 1
+ },
+ {
+ "global index": "MedMCQA_853",
+ "flip": 0
+ },
+ {
+ "global index": "MusicTheoryBench_126",
+ "flip": 1
+ },
+ {
+ "global index": "MusicTheoryBench_14",
+ "flip": 0
+ },
+ {
+ "global index": "MusicTheoryBench_147",
+ "flip": 0
+ },
+ {
+ "global index": "MusicTheoryBench_152",
+ "flip": 0
+ },
+ {
+ "global index": "MusicTheoryBench_188",
+ "flip": 1
+ },
+ {
+ "global index": "MusicTheoryBench_189",
+ "flip": 0
+ },
+ {
+ "global index": "MusicTheoryBench_240",
+ "flip": 0
+ },
+ {
+ "global index": "MusicTheoryBench_33",
+ "flip": 0
+ },
+ {
+ "global index": "MusicTheoryBench_337",
+ "flip": 0
+ },
+ {
+ "global index": "MusicTheoryBench_340",
+ "flip": 0
+ },
+ {
+ "global index": "MusicTheoryBench_70",
+ "flip": 1
+ },
+ {
+ "global index": "NarrativeQA_131",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_1683",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_2474",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_2820",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_3282",
+ "flip": 1
+ },
+ {
+ "global index": "NarrativeQA_4102",
+ "flip": 1
+ },
+ {
+ "global index": "NarrativeQA_4128",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_4347",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_4540",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_5022",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_5259",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_533",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_5894",
+ "flip": 1
+ },
+ {
+ "global index": "NarrativeQA_6829",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_7678",
+ "flip": 1
+ },
+ {
+ "global index": "NarrativeQA_7964",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_8215",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_8598",
+ "flip": 1
+ },
+ {
+ "global index": "NarrativeQA_927",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Animals_2545",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_Animals_262",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Art_1429",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Celebrities_1078",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_Celebrities_1456",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Celebrities_3577",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Celebrities_3968",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_1178",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_1807",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_1957",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_2181",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_3404",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_3727",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_4019",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Geography_2099",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Geography_2346",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Geography_3880",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Geography_792",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_History_1162",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_History_2026",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_History_3712",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_History_3902",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Science & Nature_1175",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Science & Nature_1560",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Science & Nature_3716",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Science & Nature_476",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Sports_2289",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Vehicles_1173",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Vehicles_1419",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_Vehicles_2519",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_Vehicles_3234",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_0",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_154",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_18",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_238",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_250",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_337",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_362",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_437",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_510",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_520",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_575",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_582",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_588",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_610",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_63",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_643",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_687",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_722",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_73",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_755",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_8",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_81",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_854",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_905",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Fine Arts_1212",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Fine Arts_1702",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Fine Arts_828",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Fine Arts_865",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Geography_1023",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Geography_1555",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Geography_304",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_History_1084",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_History_1154",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_History_433",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_History_473",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_History_926",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Literature_1045",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Literature_1073",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Literature_1239",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Literature_1326",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Literature_1727",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Literature_1843",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Literature_386",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Literature_408",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Literature_475",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Literature_833",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Philosophy_1270",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Philosophy_499",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Philosophy_91",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Science_1360",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Science_1473",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Science_308",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Science_619",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Social Science_1847",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Social Science_2",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Social Science_77",
+ "flip": 1
+ },
+ {
+ "global index": "SocialiQA_13810",
+ "flip": 1
+ },
+ {
+ "global index": "SocialiQA_22095",
+ "flip": 0
+ },
+ {
+ "global index": "SocialiQA_26846",
+ "flip": 0
+ },
+ {
+ "global index": "SocialiQA_7839",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-CausalReasoning_4526",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-ClozeTest_12894",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-ClozeTest_17965",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-ClozeTest_18766",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Entailment_19410",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Entailment_19567",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Entailment_522",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Entailment_767",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-QA_1408",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-QA_3137",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-QA_4046",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-QA_4102",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-QA_4160",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-RC_7725",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-RC_7738",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-RC_8531",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Wic_19695",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-Wic_19738",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Wic_20079",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Wic_20189",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-Wic_20253",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-Wsc_20368",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Wsc_20370",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-cs-en_156",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-cs-en_246",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-cs-en_568",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-de-en_46",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-de-en_715",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-de-en_883",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-fi-en_222",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-fi-en_610",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-gu-en_116",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-gu-en_123",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-gu-en_191",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-gu-en_491",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-gu-en_968",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-kk-en_528",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-kk-en_826",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-lt-en_135",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-lt-en_269",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-lt-en_636",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-ru-en_222",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-zh-en_218",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-zh-en_252",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-zh-en_59",
+ "flip": 1
+ }
+]
diff --git a/src/data/flip_labels/flip_labels_graphrouter.json b/src/data/flip_labels/flip_labels_graphrouter.json
new file mode 100644
index 0000000..8da2159
--- /dev/null
+++ b/src/data/flip_labels/flip_labels_graphrouter.json
@@ -0,0 +1,1682 @@
+[
+ {
+ "global_index": "AIME_112",
+ "flip": 0
+ },
+ {
+ "global_index": "AIME_58",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_12",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_123",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_16",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_182",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_230",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_293",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_349",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_378",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_443",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_496",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_631",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_646",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_659",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_676",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_685",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_689",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_702",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_713",
+ "flip": 0
+ },
+ {
+ "global_index": "ArcMMLU_98",
+ "flip": 0
+ },
+ {
+ "global_index": "AsDiv_1165",
+ "flip": 0
+ },
+ {
+ "global_index": "AsDiv_1347",
+ "flip": 0
+ },
+ {
+ "global_index": "AsDiv_472",
+ "flip": 1
+ },
+ {
+ "global_index": "AsDiv_733",
+ "flip": 0
+ },
+ {
+ "global_index": "ChessInstruct_0",
+ "flip": 0
+ },
+ {
+ "global_index": "ChessInstruct_107",
+ "flip": 0
+ },
+ {
+ "global_index": "ChessInstruct_144",
+ "flip": 0
+ },
+ {
+ "global_index": "ChessInstruct_42",
+ "flip": 0
+ },
+ {
+ "global_index": "ChessInstruct_58",
+ "flip": 0
+ },
+ {
+ "global_index": "ChessInstruct_71",
+ "flip": 0
+ },
+ {
+ "global_index": "ChessInstruct_84",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_commonsense_28",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_commonsense_51",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_commonsense_6",
+ "flip": 1
+ },
+ {
+ "global_index": "Ethics_commonsense_62",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_commonsense_70",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_commonsense_85",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_commonsense_90",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_deontology_0",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_deontology_2",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_deontology_31",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_deontology_32",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_deontology_56",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_justice_1",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_justice_45",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_justice_76",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_justice_84",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_virtue_14",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_virtue_26",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_virtue_30",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_virtue_48",
+ "flip": 0
+ },
+ {
+ "global_index": "Ethics_virtue_51",
+ "flip": 0
+ },
+ {
+ "global_index": "FinQA_149",
+ "flip": 0
+ },
+ {
+ "global_index": "FinQA_208",
+ "flip": 0
+ },
+ {
+ "global_index": "FinQA_56",
+ "flip": 0
+ },
+ {
+ "global_index": "FinQA_60",
+ "flip": 0
+ },
+ {
+ "global_index": "GSM8K_43",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_1002",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_1094",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_1102",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_1113",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_124",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_1243",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_30",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_502",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_526",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_591",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_766",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_87",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_915",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_944",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoBench_968",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoGraphyData_100k_27",
+ "flip": 0
+ },
+ {
+ "global_index": "GeoGraphyData_100k_42",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_105",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_114",
+ "flip": 1
+ },
+ {
+ "global_index": "LiveCodeBench_118",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_131",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_136",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_181",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_237",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_271",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_350",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_386",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_405",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_43",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_431",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_437",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_476",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_485",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_49",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_491",
+ "flip": 0
+ },
+ {
+ "global_index": "LiveCodeBench_499",
+ "flip": 1
+ },
+ {
+ "global_index": "MATH_108",
+ "flip": 1
+ },
+ {
+ "global_index": "MATH_442",
+ "flip": 0
+ },
+ {
+ "global_index": "MATH_53",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_biology_2808",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_biology_2912",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_biology_2980",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_biology_2985",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_biology_3188",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_biology_3215",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_biology_3225",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_business_226",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_business_294",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_business_378",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_business_430",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_business_503",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_business_507",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_business_6",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_business_784",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_chemistry_3796",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_chemistry_3837",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_chemistry_3974",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_chemistry_4067",
+ "flip": 1
+ },
+ {
+ "global_index": "MMLUPro_chemistry_4407",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9086",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9110",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9136",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9138",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9149",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9200",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9212",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9239",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9264",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9285",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9289",
+ "flip": 1
+ },
+ {
+ "global_index": "MMLUPro_computer science_9414",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9415",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9430",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9452",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9471",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_computer science_9475",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_economics_5769",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_economics_5907",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_economics_5931",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_economics_5965",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_economics_6114",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_economics_6122",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_economics_6135",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_economics_6325",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_economics_6353",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_engineering_10076",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_engineering_10125",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_engineering_10179",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_engineering_10195",
+ "flip": 1
+ },
+ {
+ "global_index": "MMLUPro_engineering_10199",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_engineering_10298",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_engineering_10342",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_engineering_10395",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_engineering_10428",
+ "flip": 1
+ },
+ {
+ "global_index": "MMLUPro_engineering_10432",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_engineering_10473",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_engineering_10537",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_engineering_10701",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_engineering_10823",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_engineering_10864",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_health_4885",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_health_4973",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_health_5093",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_health_5144",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_health_5214",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_health_5215",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_health_5261",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_health_5473",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_health_5514",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4486",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4490",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4497",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4509",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4517",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4523",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4605",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4629",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4638",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4717",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4749",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4752",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4774",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4810",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4833",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4836",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_history_4841",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_law_1007",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_law_1031",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_law_1386",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_law_1462",
+ "flip": 1
+ },
+ {
+ "global_index": "MMLUPro_law_1484",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_law_1518",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_law_1818",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_law_806",
+ "flip": 1
+ },
+ {
+ "global_index": "MMLUPro_law_899",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_math_6429",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_math_6526",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_math_6623",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_math_6848",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_math_7101",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_math_7249",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_math_7284",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_math_7451",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_math_7577",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_philosophy_9510",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_philosophy_9536",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_philosophy_9663",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_philosophy_9672",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_philosophy_9943",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_physics_7773",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_physics_7887",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_physics_7893",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_physics_8888",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_physics_9017",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_psychology_2005",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_psychology_2186",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_psychology_2329",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_psychology_2406",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_psychology_2420",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_psychology_2450",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_psychology_2457",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLUPro_psychology_2524",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLU_formal_logic_121",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLU_formal_logic_16",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLU_formal_logic_32",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLU_formal_logic_63",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLU_formal_logic_7",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLU_formal_logic_70",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLU_formal_logic_85",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLU_management_4",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLU_management_41",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLU_management_77",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLU_management_91",
+ "flip": 0
+ },
+ {
+ "global_index": "MMLU_management_93",
+ "flip": 0
+ },
+ {
+ "global_index": "MathQA_158",
+ "flip": 0
+ },
+ {
+ "global_index": "MathQA_1742",
+ "flip": 0
+ },
+ {
+ "global_index": "MathQA_202",
+ "flip": 0
+ },
+ {
+ "global_index": "MathQA_2092",
+ "flip": 0
+ },
+ {
+ "global_index": "MathQA_2102",
+ "flip": 0
+ },
+ {
+ "global_index": "MathQA_2851",
+ "flip": 0
+ },
+ {
+ "global_index": "MathQA_827",
+ "flip": 1
+ },
+ {
+ "global_index": "MathQA_84",
+ "flip": 1
+ },
+ {
+ "global_index": "MedMCQA_1005",
+ "flip": 0
+ },
+ {
+ "global_index": "MedMCQA_1054",
+ "flip": 0
+ },
+ {
+ "global_index": "MedMCQA_1298",
+ "flip": 0
+ },
+ {
+ "global_index": "MedMCQA_1309",
+ "flip": 0
+ },
+ {
+ "global_index": "MedMCQA_1362",
+ "flip": 0
+ },
+ {
+ "global_index": "MedMCQA_145",
+ "flip": 0
+ },
+ {
+ "global_index": "MedMCQA_2010",
+ "flip": 0
+ },
+ {
+ "global_index": "MedMCQA_2323",
+ "flip": 0
+ },
+ {
+ "global_index": "MedMCQA_2366",
+ "flip": 0
+ },
+ {
+ "global_index": "MedMCQA_2581",
+ "flip": 0
+ },
+ {
+ "global_index": "MedMCQA_511",
+ "flip": 0
+ },
+ {
+ "global_index": "MedMCQA_59",
+ "flip": 0
+ },
+ {
+ "global_index": "MedMCQA_643",
+ "flip": 0
+ },
+ {
+ "global_index": "MedMCQA_853",
+ "flip": 0
+ },
+ {
+ "global_index": "MusicTheoryBench_126",
+ "flip": 0
+ },
+ {
+ "global_index": "MusicTheoryBench_14",
+ "flip": 0
+ },
+ {
+ "global_index": "MusicTheoryBench_147",
+ "flip": 0
+ },
+ {
+ "global_index": "MusicTheoryBench_152",
+ "flip": 0
+ },
+ {
+ "global_index": "MusicTheoryBench_188",
+ "flip": 0
+ },
+ {
+ "global_index": "MusicTheoryBench_189",
+ "flip": 0
+ },
+ {
+ "global_index": "MusicTheoryBench_240",
+ "flip": 0
+ },
+ {
+ "global_index": "MusicTheoryBench_33",
+ "flip": 0
+ },
+ {
+ "global_index": "MusicTheoryBench_337",
+ "flip": 0
+ },
+ {
+ "global_index": "MusicTheoryBench_340",
+ "flip": 0
+ },
+ {
+ "global_index": "MusicTheoryBench_70",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_131",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_1683",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_2474",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_2820",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_3282",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_4102",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_4128",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_4347",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_4540",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_5022",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_5259",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_533",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_5894",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_6829",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_7678",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_7964",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_8215",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_8598",
+ "flip": 0
+ },
+ {
+ "global_index": "NarrativeQA_927",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Animals_2545",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Animals_262",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Art_1429",
+ "flip": 1
+ },
+ {
+ "global_index": "OpenTDB_Celebrities_1078",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Celebrities_1456",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Celebrities_3577",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Celebrities_3968",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_General Knowledge_1178",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_General Knowledge_1807",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_General Knowledge_1957",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_General Knowledge_2181",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_General Knowledge_3404",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_General Knowledge_3727",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_General Knowledge_4019",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Geography_2099",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Geography_2346",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Geography_3880",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Geography_792",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_History_1162",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_History_2026",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_History_3712",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_History_3902",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Science & Nature_1175",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Science & Nature_1560",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Science & Nature_3716",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Science & Nature_476",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Sports_2289",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Vehicles_1173",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Vehicles_1419",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Vehicles_2519",
+ "flip": 0
+ },
+ {
+ "global_index": "OpenTDB_Vehicles_3234",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_0",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_154",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_18",
+ "flip": 1
+ },
+ {
+ "global_index": "PubMedQA_238",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_250",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_337",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_362",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_437",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_510",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_520",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_575",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_582",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_588",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_610",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_63",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_643",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_687",
+ "flip": 1
+ },
+ {
+ "global_index": "PubMedQA_722",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_73",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_755",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_8",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_81",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_854",
+ "flip": 0
+ },
+ {
+ "global_index": "PubMedQA_905",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Fine Arts_1212",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Fine Arts_1702",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Fine Arts_828",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Fine Arts_865",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Geography_1023",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Geography_1555",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Geography_304",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_History_1084",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_History_1154",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_History_433",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_History_473",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_History_926",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Literature_1045",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Literature_1073",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Literature_1239",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Literature_1326",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Literature_1727",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Literature_1843",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Literature_386",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Literature_408",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Literature_475",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Literature_833",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Philosophy_1270",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Philosophy_499",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Philosophy_91",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Science_1360",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Science_1473",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Science_308",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Science_619",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Social Science_1847",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Social Science_2",
+ "flip": 0
+ },
+ {
+ "global_index": "QANTA_Social Science_77",
+ "flip": 0
+ },
+ {
+ "global_index": "SocialiQA_13810",
+ "flip": 0
+ },
+ {
+ "global_index": "SocialiQA_22095",
+ "flip": 0
+ },
+ {
+ "global_index": "SocialiQA_26846",
+ "flip": 0
+ },
+ {
+ "global_index": "SocialiQA_7839",
+ "flip": 1
+ },
+ {
+ "global_index": "SuperGLUE-CausalReasoning_4526",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-ClozeTest_12894",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-ClozeTest_17965",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-ClozeTest_18766",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-Entailment_19410",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-Entailment_19567",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-Entailment_522",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-Entailment_767",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-QA_1408",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-QA_3137",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-QA_4046",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-QA_4102",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-QA_4160",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-RC_7725",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-RC_7738",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-RC_8531",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-Wic_19695",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-Wic_19738",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-Wic_20079",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-Wic_20189",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-Wic_20253",
+ "flip": 1
+ },
+ {
+ "global_index": "SuperGLUE-Wsc_20368",
+ "flip": 0
+ },
+ {
+ "global_index": "SuperGLUE-Wsc_20370",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-cs-en_156",
+ "flip": 1
+ },
+ {
+ "global_index": "WMT19-cs-en_246",
+ "flip": 1
+ },
+ {
+ "global_index": "WMT19-cs-en_568",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-de-en_46",
+ "flip": 1
+ },
+ {
+ "global_index": "WMT19-de-en_715",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-de-en_883",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-fi-en_222",
+ "flip": 1
+ },
+ {
+ "global_index": "WMT19-fi-en_610",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-gu-en_116",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-gu-en_123",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-gu-en_191",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-gu-en_491",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-gu-en_968",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-kk-en_528",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-kk-en_826",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-lt-en_135",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-lt-en_269",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-lt-en_636",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-ru-en_222",
+ "flip": 1
+ },
+ {
+ "global_index": "WMT19-zh-en_218",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-zh-en_252",
+ "flip": 0
+ },
+ {
+ "global_index": "WMT19-zh-en_59",
+ "flip": 1
+ }
+]
diff --git a/src/data/flip_labels/flip_labels_notdiamond.json b/src/data/flip_labels/flip_labels_notdiamond.json
new file mode 100644
index 0000000..3403e0d
--- /dev/null
+++ b/src/data/flip_labels/flip_labels_notdiamond.json
@@ -0,0 +1,1626 @@
+[
+ {
+ "global index": "AIME_112",
+ "flip": 0
+ },
+ {
+ "global index": "AIME_58",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_12",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_123",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_16",
+ "flip": 1
+ },
+ {
+ "global index": "ArcMMLU_182",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_230",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_293",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_349",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_378",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_443",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_496",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_631",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_646",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_659",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_676",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_685",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_689",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_702",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_713",
+ "flip": 0
+ },
+ {
+ "global index": "ArcMMLU_98",
+ "flip": 0
+ },
+ {
+ "global index": "AsDiv_1165",
+ "flip": 0
+ },
+ {
+ "global index": "AsDiv_1347",
+ "flip": 0
+ },
+ {
+ "global index": "AsDiv_472",
+ "flip": 0
+ },
+ {
+ "global index": "AsDiv_733",
+ "flip": 0
+ },
+ {
+ "global index": "ChessInstruct_0",
+ "flip": 1
+ },
+ {
+ "global index": "ChessInstruct_107",
+ "flip": 0
+ },
+ {
+ "global index": "ChessInstruct_144",
+ "flip": 0
+ },
+ {
+ "global index": "ChessInstruct_42",
+ "flip": 1
+ },
+ {
+ "global index": "ChessInstruct_58",
+ "flip": 1
+ },
+ {
+ "global index": "ChessInstruct_71",
+ "flip": 1
+ },
+ {
+ "global index": "ChessInstruct_84",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_commonsense_28",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_commonsense_51",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_commonsense_6",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_commonsense_62",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_commonsense_70",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_commonsense_85",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_commonsense_90",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_deontology_0",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_deontology_2",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_deontology_31",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_deontology_32",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_deontology_56",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_justice_1",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_justice_45",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_justice_76",
+ "flip": 1
+ },
+ {
+ "global index": "Ethics_justice_84",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_virtue_14",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_virtue_26",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_virtue_30",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_virtue_48",
+ "flip": 0
+ },
+ {
+ "global index": "Ethics_virtue_51",
+ "flip": 1
+ },
+ {
+ "global index": "FinQA_149",
+ "flip": 0
+ },
+ {
+ "global index": "FinQA_208",
+ "flip": 0
+ },
+ {
+ "global index": "FinQA_56",
+ "flip": 0
+ },
+ {
+ "global index": "FinQA_60",
+ "flip": 0
+ },
+ {
+ "global index": "GSM8K_43",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_1002",
+ "flip": 1
+ },
+ {
+ "global index": "GeoBench_1094",
+ "flip": 1
+ },
+ {
+ "global index": "GeoBench_1102",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_1113",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_124",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_1243",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_30",
+ "flip": 1
+ },
+ {
+ "global index": "GeoBench_502",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_526",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_591",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_766",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_87",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_915",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_944",
+ "flip": 0
+ },
+ {
+ "global index": "GeoBench_968",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_105",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_114",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_118",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_131",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_136",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_181",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_237",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_271",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_350",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_386",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_405",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_43",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_431",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_437",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_476",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_485",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_49",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_491",
+ "flip": 0
+ },
+ {
+ "global index": "LiveCodeBench_499",
+ "flip": 0
+ },
+ {
+ "global index": "MATH_108",
+ "flip": 0
+ },
+ {
+ "global index": "MATH_442",
+ "flip": 0
+ },
+ {
+ "global index": "MATH_53",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_biology_2808",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_biology_2912",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_biology_2980",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_biology_2985",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_biology_3188",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_biology_3215",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_biology_3225",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_business_226",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_business_294",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_business_378",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_business_430",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_business_503",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_business_507",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_business_6",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_business_784",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_chemistry_3796",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_chemistry_3837",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_chemistry_3974",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_chemistry_4067",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_chemistry_4407",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9086",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9110",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9136",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9138",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9149",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9200",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9212",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9239",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9264",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9285",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9289",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9414",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9415",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9430",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_computer science_9452",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9471",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_computer science_9475",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_economics_5769",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_economics_5907",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_economics_5931",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_economics_5965",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_economics_6114",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_economics_6122",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_economics_6135",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_economics_6325",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_economics_6353",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_engineering_10076",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10125",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10179",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10195",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10199",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10298",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10342",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10395",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10428",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10432",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10473",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10537",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10701",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_engineering_10823",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_engineering_10864",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_health_4885",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_health_4973",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_health_5093",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_health_5144",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_health_5214",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_health_5215",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_health_5261",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_health_5473",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_health_5514",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4486",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4490",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4497",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4509",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_history_4517",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4523",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4605",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4629",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4638",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4717",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4749",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4752",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4774",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4810",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4833",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_history_4836",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_history_4841",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_law_1007",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_law_1031",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_law_1386",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_law_1462",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_law_1484",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_law_1518",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_law_1818",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_law_806",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_law_899",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_math_6429",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_math_6526",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_math_6623",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_math_6848",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_math_7101",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_math_7249",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_math_7284",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_math_7451",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_math_7577",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_philosophy_9510",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_philosophy_9536",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_philosophy_9663",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_philosophy_9672",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_philosophy_9943",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_physics_7773",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_physics_7887",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_physics_7893",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_physics_8888",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_physics_9017",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_psychology_2005",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_psychology_2186",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_psychology_2329",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_psychology_2406",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_psychology_2420",
+ "flip": 0
+ },
+ {
+ "global index": "MMLUPro_psychology_2450",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_psychology_2457",
+ "flip": 1
+ },
+ {
+ "global index": "MMLUPro_psychology_2524",
+ "flip": 0
+ },
+ {
+ "global index": "MathQA_158",
+ "flip": 1
+ },
+ {
+ "global index": "MathQA_1742",
+ "flip": 1
+ },
+ {
+ "global index": "MathQA_202",
+ "flip": 0
+ },
+ {
+ "global index": "MathQA_2092",
+ "flip": 0
+ },
+ {
+ "global index": "MathQA_2102",
+ "flip": 0
+ },
+ {
+ "global index": "MathQA_2851",
+ "flip": 0
+ },
+ {
+ "global index": "MathQA_827",
+ "flip": 0
+ },
+ {
+ "global index": "MathQA_84",
+ "flip": 1
+ },
+ {
+ "global index": "MedMCQA_1005",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_1054",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_1298",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_1309",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_1362",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_145",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_2010",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_2323",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_2366",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_2581",
+ "flip": 1
+ },
+ {
+ "global index": "MedMCQA_511",
+ "flip": 1
+ },
+ {
+ "global index": "MedMCQA_59",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_643",
+ "flip": 0
+ },
+ {
+ "global index": "MedMCQA_853",
+ "flip": 0
+ },
+ {
+ "global index": "MusicTheoryBench_126",
+ "flip": 1
+ },
+ {
+ "global index": "MusicTheoryBench_14",
+ "flip": 0
+ },
+ {
+ "global index": "MusicTheoryBench_147",
+ "flip": 1
+ },
+ {
+ "global index": "MusicTheoryBench_152",
+ "flip": 1
+ },
+ {
+ "global index": "MusicTheoryBench_188",
+ "flip": 1
+ },
+ {
+ "global index": "MusicTheoryBench_189",
+ "flip": 1
+ },
+ {
+ "global index": "MusicTheoryBench_240",
+ "flip": 0
+ },
+ {
+ "global index": "MusicTheoryBench_33",
+ "flip": 1
+ },
+ {
+ "global index": "MusicTheoryBench_337",
+ "flip": 1
+ },
+ {
+ "global index": "MusicTheoryBench_340",
+ "flip": 0
+ },
+ {
+ "global index": "MusicTheoryBench_70",
+ "flip": 1
+ },
+ {
+ "global index": "NarrativeQA_131",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_1683",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_2474",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_2820",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_3282",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_4102",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_4128",
+ "flip": 1
+ },
+ {
+ "global index": "NarrativeQA_4347",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_4540",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_5022",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_5259",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_533",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_5894",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_6829",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_7678",
+ "flip": 1
+ },
+ {
+ "global index": "NarrativeQA_7964",
+ "flip": 1
+ },
+ {
+ "global index": "NarrativeQA_8215",
+ "flip": 0
+ },
+ {
+ "global index": "NarrativeQA_8598",
+ "flip": 1
+ },
+ {
+ "global index": "NarrativeQA_927",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_Animals_2545",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Animals_262",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_Art_1429",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Celebrities_1078",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Celebrities_1456",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Celebrities_3577",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_Celebrities_3968",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_1178",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_1807",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_1957",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_2181",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_3404",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_3727",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_General Knowledge_4019",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_Geography_2099",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Geography_2346",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Geography_3880",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Geography_792",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_History_1162",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_History_2026",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_History_3712",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_History_3902",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Science & Nature_1175",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Science & Nature_1560",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Science & Nature_3716",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Science & Nature_476",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_Sports_2289",
+ "flip": 1
+ },
+ {
+ "global index": "OpenTDB_Vehicles_1173",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Vehicles_1419",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Vehicles_2519",
+ "flip": 0
+ },
+ {
+ "global index": "OpenTDB_Vehicles_3234",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_0",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_154",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_18",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_238",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_250",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_337",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_362",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_437",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_510",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_520",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_575",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_582",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_588",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_610",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_63",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_643",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_687",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_722",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_73",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_755",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_8",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_81",
+ "flip": 0
+ },
+ {
+ "global index": "PubMedQA_854",
+ "flip": 1
+ },
+ {
+ "global index": "PubMedQA_905",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Fine Arts_1212",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Fine Arts_1702",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Fine Arts_828",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Fine Arts_865",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Geography_1023",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Geography_1555",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Geography_304",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_History_1084",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_History_1154",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_History_433",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_History_473",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_History_926",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Literature_1045",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Literature_1073",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Literature_1239",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Literature_1326",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Literature_1727",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Literature_1843",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Literature_386",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Literature_408",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Literature_475",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Literature_833",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Philosophy_1270",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Philosophy_499",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Philosophy_91",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Science_1360",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Science_1473",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Science_308",
+ "flip": 0
+ },
+ {
+ "global index": "QANTA_Science_619",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Social Science_1847",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Social Science_2",
+ "flip": 1
+ },
+ {
+ "global index": "QANTA_Social Science_77",
+ "flip": 1
+ },
+ {
+ "global index": "SocialiQA_13810",
+ "flip": 1
+ },
+ {
+ "global index": "SocialiQA_22095",
+ "flip": 0
+ },
+ {
+ "global index": "SocialiQA_26846",
+ "flip": 0
+ },
+ {
+ "global index": "SocialiQA_7839",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-CausalReasoning_4526",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-ClozeTest_12894",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-ClozeTest_17965",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-ClozeTest_18766",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Entailment_19410",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Entailment_19567",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-Entailment_522",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Entailment_767",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-QA_1408",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-QA_3137",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-QA_4046",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-QA_4102",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-QA_4160",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-RC_7725",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-RC_7738",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-RC_8531",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-Wic_19695",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Wic_19738",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Wic_20079",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Wic_20189",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Wic_20253",
+ "flip": 1
+ },
+ {
+ "global index": "SuperGLUE-Wsc_20368",
+ "flip": 0
+ },
+ {
+ "global index": "SuperGLUE-Wsc_20370",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-cs-en_156",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-cs-en_246",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-cs-en_568",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-de-en_46",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-de-en_715",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-de-en_883",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-fi-en_222",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-fi-en_610",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-gu-en_116",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-gu-en_123",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-gu-en_191",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-gu-en_491",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-gu-en_968",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-kk-en_528",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-kk-en_826",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-lt-en_135",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-lt-en_269",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-lt-en_636",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-ru-en_222",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-zh-en_218",
+ "flip": 0
+ },
+ {
+ "global index": "WMT19-zh-en_252",
+ "flip": 1
+ },
+ {
+ "global index": "WMT19-zh-en_59",
+ "flip": 0
+ }
+]
diff --git a/src/data/routerMetrics/category_scores.json b/src/data/routerMetrics/category_scores.json
index d73d6b7..a0e1d63 100644
--- a/src/data/routerMetrics/category_scores.json
+++ b/src/data/routerMetrics/category_scores.json
@@ -4,22 +4,22 @@
"easy": {
"accuracy": 95.2,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 92.9
},
"medium": {
"accuracy": 58.6,
"cost": 0.0021,
- "robustness": 0
+ "robustness": 90.3
},
"hard": {
"accuracy": 21.2,
"cost": 0.0026,
- "robustness": 0
+ "robustness": 80.7
},
"all": {
"accuracy": 67.2,
"cost": 0.0021,
- "robustness": 0
+ "robustness": 89.0
}
},
"categories": {
@@ -28,22 +28,22 @@
"easy": {
"accuracy": 97.0,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 80.8
},
"medium": {
"accuracy": 62.8,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 90.5
},
"hard": {
"accuracy": 7.3,
"cost": 0.003,
- "robustness": 0
+ "robustness": 93.3
},
"all": {
"accuracy": 70.0,
"cost": 0.0021,
- "robustness": 0
+ "robustness": 87.1
}
},
"subcategories": {
@@ -52,22 +52,22 @@
"easy": {
"accuracy": 96.0,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 76.9
},
"medium": {
"accuracy": 67.1,
"cost": 0.0015,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 5.6,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 81.6,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 84.2
}
}
},
@@ -76,22 +76,22 @@
"easy": {
"accuracy": 97.6,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 84.6
},
"medium": {
"accuracy": 61.8,
"cost": 0.0026,
- "robustness": 0
+ "robustness": 88.2
},
"hard": {
"accuracy": 7.7,
"cost": 0.0032,
- "robustness": 0
+ "robustness": 92.3
},
"all": {
"accuracy": 65.4,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 88.4
}
}
}
@@ -102,22 +102,22 @@
"easy": {
"accuracy": 92.9,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 56.9,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 7.4,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 72.1,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
}
},
"subcategories": {
@@ -126,22 +126,22 @@
"easy": {
"accuracy": 90.3,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 38.8,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 70.7,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -150,22 +150,22 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 70.0,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 8.9,
"cost": 0.002,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 56.6,
"cost": 0.002,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -174,7 +174,7 @@
"easy": {
"accuracy": 95.9,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 75.0,
@@ -184,12 +184,12 @@
"hard": {
"accuracy": 5.6,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 81.8,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -198,12 +198,12 @@
"easy": {
"accuracy": 90.8,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 60.5,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 20.0,
@@ -213,7 +213,7 @@
"all": {
"accuracy": 76.9,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -224,22 +224,22 @@
"easy": {
"accuracy": 94.6,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 83.3
},
"medium": {
"accuracy": 60.9,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 8.1,
"cost": 0.0027,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 64.0,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 91.9
}
},
"subcategories": {
@@ -248,22 +248,22 @@
"easy": {
"accuracy": 96.2,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 75.0
},
"medium": {
"accuracy": 57.7,
"cost": 0.0023,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 6.2,
"cost": 0.0033,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 64.6,
"cost": 0.0023,
- "robustness": 0
+ "robustness": 85.7
}
}
},
@@ -272,22 +272,22 @@
"easy": {
"accuracy": 89.2,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 69.8,
"cost": 0.0028,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 13.3,
"cost": 0.0025,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 67.1,
"cost": 0.0026,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -301,17 +301,17 @@
"medium": {
"accuracy": 72.2,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 5.9,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 32.7,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -320,7 +320,7 @@
"easy": {
"accuracy": 95.7,
"cost": 0.0015,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 33.3,
@@ -330,12 +330,12 @@
"hard": {
"accuracy": 20.0,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 80.3,
"cost": 0.0015,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -346,22 +346,22 @@
"easy": {
"accuracy": 80.4,
"cost": 0.001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 36.5,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 77.8
},
"hard": {
"accuracy": 52.9,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 56.0
},
"all": {
"accuracy": 57.2,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 71.1
}
},
"subcategories": {
@@ -370,22 +370,22 @@
"easy": {
"accuracy": 80.4,
"cost": 0.001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 36.5,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 77.8
},
"hard": {
"accuracy": 52.9,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 56.0
},
"all": {
"accuracy": 57.2,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 71.1
}
}
}
@@ -396,22 +396,22 @@
"easy": {
"accuracy": 97.4,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 84.6
},
"medium": {
"accuracy": 56.2,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 85.7
},
"hard": {
"accuracy": 3.6,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 55.6
},
"all": {
"accuracy": 71.7,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 81.2
}
},
"subcategories": {
@@ -420,22 +420,22 @@
"easy": {
"accuracy": 96.2,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 72.7
},
"medium": {
"accuracy": 51.6,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 81.8
},
"hard": {
"accuracy": 2.2,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 60.0
},
"all": {
"accuracy": 63.7,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 74.1
}
}
},
@@ -444,22 +444,22 @@
"easy": {
"accuracy": 97.3,
"cost": 0.0015,
- "robustness": 0
+ "robustness": 87.5
},
"medium": {
"accuracy": 54.2,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 7.7,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 86.1,
"cost": 0.0015,
- "robustness": 0
+ "robustness": 86.7
}
}
},
@@ -468,7 +468,7 @@
"easy": {
"accuracy": 99.1,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 58.8,
@@ -483,7 +483,7 @@
"all": {
"accuracy": 88.1,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -492,12 +492,12 @@
"easy": {
"accuracy": 97.7,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 70.6,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 0.0,
@@ -507,7 +507,7 @@
"all": {
"accuracy": 73.3,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 60.0
}
}
},
@@ -516,12 +516,12 @@
"easy": {
"accuracy": 97.7,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 64.3,
"cost": 0.0021,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 18.2,
@@ -531,7 +531,7 @@
"all": {
"accuracy": 74.2,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -540,7 +540,7 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0015,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 100.0,
@@ -555,7 +555,7 @@
"all": {
"accuracy": 100.0,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -564,7 +564,7 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0015,
- "robustness": 0
+ "robustness": 80.0
},
"medium": {
"accuracy": 64.0,
@@ -574,12 +574,12 @@
"hard": {
"accuracy": 3.4,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 42.1,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 75.0
}
}
}
@@ -590,22 +590,22 @@
"easy": {
"accuracy": 95.7,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 58.1,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 85.7
},
"hard": {
"accuracy": 6.4,
"cost": 0.0023,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 74.4,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 95.8
}
},
"subcategories": {
@@ -614,22 +614,22 @@
"easy": {
"accuracy": 94.4,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 62.3,
"cost": 0.0026,
- "robustness": 0
+ "robustness": 84.6
},
"hard": {
"accuracy": 4.8,
"cost": 0.0026,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 67.2,
"cost": 0.0023,
- "robustness": 0
+ "robustness": 89.5
}
}
},
@@ -638,22 +638,22 @@
"easy": {
"accuracy": 95.8,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 55.4,
"cost": 0.002,
- "robustness": 0
+ "robustness": 85.7
},
"hard": {
"accuracy": 7.1,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 75.8,
"cost": 0.0023,
- "robustness": 0
+ "robustness": 97.9
}
}
},
@@ -662,12 +662,12 @@
"easy": {
"accuracy": 98.3,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 54.5,
"cost": 0.0011,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -677,7 +677,7 @@
"all": {
"accuracy": 89.0,
"cost": 0.0012,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -688,22 +688,22 @@
"easy": {
"accuracy": 93.3,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 52.9,
"cost": 0.002,
- "robustness": 0
+ "robustness": 90.9
},
"hard": {
"accuracy": 10.9,
"cost": 0.0026,
- "robustness": 0
+ "robustness": 80.0
},
"all": {
"accuracy": 61.3,
"cost": 0.002,
- "robustness": 0
+ "robustness": 91.7
}
},
"subcategories": {
@@ -712,22 +712,22 @@
"easy": {
"accuracy": 90.7,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 70.0,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 75.0
},
"hard": {
"accuracy": 13.6,
"cost": 0.0037,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 67.3,
"cost": 0.0025,
- "robustness": 0
+ "robustness": 75.0
}
}
},
@@ -736,22 +736,22 @@
"easy": {
"accuracy": 92.9,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 28.4,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 8.0,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 52.9,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -760,22 +760,22 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 82.1,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 8.9,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 62.3,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -786,22 +786,22 @@
"easy": {
"accuracy": 99.1,
"cost": 0.002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 77.9,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 30.8,
"cost": 0.0034,
- "robustness": 0
+ "robustness": 83.3
},
"all": {
"accuracy": 46.4,
"cost": 0.0031,
- "robustness": 0
+ "robustness": 86.2
}
},
"subcategories": {
@@ -810,22 +810,22 @@
"easy": {
"accuracy": 99.1,
"cost": 0.002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 77.9,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 30.8,
"cost": 0.0034,
- "robustness": 0
+ "robustness": 83.3
},
"all": {
"accuracy": 46.4,
"cost": 0.0031,
- "robustness": 0
+ "robustness": 86.2
}
}
}
@@ -836,22 +836,22 @@
"easy": {
"accuracy": 97.3,
"cost": 0.0021,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 74.6,
"cost": 0.0023,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 11.4,
"cost": 0.0023,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 73.3,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 100.0
}
},
"subcategories": {
@@ -860,12 +860,12 @@
"easy": {
"accuracy": 97.2,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 72.4,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 41.7,
@@ -875,7 +875,7 @@
"all": {
"accuracy": 88.0,
"cost": 0.0015,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -884,22 +884,22 @@
"easy": {
"accuracy": 97.8,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 74.8,
"cost": 0.0025,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 8.8,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 67.1,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -908,7 +908,7 @@
"easy": {
"accuracy": 95.2,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 77.8,
@@ -923,7 +923,7 @@
"all": {
"accuracy": 90.4,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -936,22 +936,22 @@
"easy": {
"accuracy": 89.8,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 94.9
},
"medium": {
"accuracy": 38.2,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 94.7
},
"hard": {
"accuracy": 13.9,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 92.7
},
"all": {
"accuracy": 57.0,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 94.3
}
},
"categories": {
@@ -960,22 +960,22 @@
"easy": {
"accuracy": 93.7,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 92.3
},
"medium": {
"accuracy": 41.6,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 2.4,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 93.3
},
"all": {
"accuracy": 60.5,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 95.2
}
},
"subcategories": {
@@ -984,22 +984,22 @@
"easy": {
"accuracy": 96.7,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 32.9,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 74.2,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -1008,22 +1008,22 @@
"easy": {
"accuracy": 91.6,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 84.6
},
"medium": {
"accuracy": 43.5,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 2.9,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 92.3
},
"all": {
"accuracy": 55.1,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 93.0
}
}
}
@@ -1034,22 +1034,22 @@
"easy": {
"accuracy": 90.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 96.3
},
"medium": {
"accuracy": 35.5,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 2.5,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 63.4,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 97.7
}
},
"subcategories": {
@@ -1058,22 +1058,22 @@
"easy": {
"accuracy": 91.4,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 92.9
},
"medium": {
"accuracy": 29.4,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 68.6,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 95.2
}
}
},
@@ -1082,22 +1082,22 @@
"easy": {
"accuracy": 89.2,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 40.0,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 2.2,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 41.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -1106,7 +1106,7 @@
"easy": {
"accuracy": 87.6,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 41.7,
@@ -1116,12 +1116,12 @@
"hard": {
"accuracy": 5.6,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 67.9,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -1130,12 +1130,12 @@
"easy": {
"accuracy": 90.8,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 36.8,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -1145,7 +1145,7 @@
"all": {
"accuracy": 67.6,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -1156,22 +1156,22 @@
"easy": {
"accuracy": 85.9,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 32.8,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 83.3
},
"hard": {
"accuracy": 2.7,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 85.7
},
"all": {
"accuracy": 49.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 91.9
}
},
"subcategories": {
@@ -1180,22 +1180,22 @@
"easy": {
"accuracy": 86.9,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 34.4,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 2.5,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 50.9,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -1204,22 +1204,22 @@
"easy": {
"accuracy": 84.6,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 30.2,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 3.3,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 47.5,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 77.8
}
}
},
@@ -1233,17 +1233,17 @@
"medium": {
"accuracy": 38.9,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 2.9,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 20.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -1252,7 +1252,7 @@
"easy": {
"accuracy": 83.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 11.1,
@@ -1262,12 +1262,12 @@
"hard": {
"accuracy": 0.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 65.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 75.0
}
}
}
@@ -1278,22 +1278,22 @@
"easy": {
"accuracy": 67.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 90.9
},
"medium": {
"accuracy": 25.1,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 28.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 76.0
},
"all": {
"accuracy": 39.3,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 84.4
}
},
"subcategories": {
@@ -1302,22 +1302,22 @@
"easy": {
"accuracy": 67.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 90.9
},
"medium": {
"accuracy": 25.1,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 28.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 76.0
},
"all": {
"accuracy": 39.3,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 84.4
}
}
}
@@ -1328,22 +1328,22 @@
"easy": {
"accuracy": 86.4,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 92.3
},
"medium": {
"accuracy": 36.6,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 90.5
},
"hard": {
"accuracy": 3.1,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 59.5,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 92.8
}
},
"subcategories": {
@@ -1352,22 +1352,22 @@
"easy": {
"accuracy": 67.9,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 72.7
},
"medium": {
"accuracy": 33.2,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 90.9
},
"hard": {
"accuracy": 2.2,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 43.8,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 85.2
}
}
},
@@ -1376,22 +1376,22 @@
"easy": {
"accuracy": 99.2,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 64.4,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 89.1,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -1400,7 +1400,7 @@
"easy": {
"accuracy": 94.9,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 17.6,
@@ -1415,7 +1415,7 @@
"all": {
"accuracy": 80.4,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -1424,12 +1424,12 @@
"easy": {
"accuracy": 76.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 26.5,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 7.7,
@@ -1439,7 +1439,7 @@
"all": {
"accuracy": 47.8,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 80.0
}
}
},
@@ -1448,12 +1448,12 @@
"easy": {
"accuracy": 95.5,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 33.3,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 9.1,
@@ -1463,7 +1463,7 @@
"all": {
"accuracy": 58.8,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -1472,7 +1472,7 @@
"easy": {
"accuracy": 95.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 40.0,
@@ -1487,7 +1487,7 @@
"all": {
"accuracy": 84.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -1496,7 +1496,7 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 36.0,
@@ -1506,12 +1506,12 @@
"hard": {
"accuracy": 1.7,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 35.1,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -1522,22 +1522,22 @@
"easy": {
"accuracy": 92.8,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 95.3
},
"medium": {
"accuracy": 40.5,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 90.5
},
"hard": {
"accuracy": 1.9,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 66.9,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 94.4
}
},
"subcategories": {
@@ -1546,22 +1546,22 @@
"easy": {
"accuracy": 86.5,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 37.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 84.6
},
"hard": {
"accuracy": 2.4,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 51.6,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 89.5
}
}
},
@@ -1570,22 +1570,22 @@
"easy": {
"accuracy": 94.2,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 94.1
},
"medium": {
"accuracy": 41.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 1.8,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 70.9,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 95.7
}
}
},
@@ -1594,12 +1594,12 @@
"easy": {
"accuracy": 91.7,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 54.5,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -1609,7 +1609,7 @@
"all": {
"accuracy": 83.6,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -1620,22 +1620,22 @@
"easy": {
"accuracy": 94.0,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 87.5
},
"medium": {
"accuracy": 35.7,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.7,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 53.1,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 95.8
}
},
"subcategories": {
@@ -1644,22 +1644,22 @@
"easy": {
"accuracy": 92.1,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 39.0,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 1.5,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 55.2,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -1668,22 +1668,22 @@
"easy": {
"accuracy": 94.9,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 29.3,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 53.3,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -1692,22 +1692,22 @@
"easy": {
"accuracy": 96.6,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 0.0
},
"medium": {
"accuracy": 46.2,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 48.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 80.0
}
}
}
@@ -1718,22 +1718,22 @@
"easy": {
"accuracy": 98.2,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 51.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 29.3,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 42.5,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 100.0
}
},
"subcategories": {
@@ -1742,22 +1742,22 @@
"easy": {
"accuracy": 98.2,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 51.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 29.3,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 42.5,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -1768,22 +1768,22 @@
"easy": {
"accuracy": 92.5,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 49.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 3.4,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 62.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
}
},
"subcategories": {
@@ -1792,12 +1792,12 @@
"easy": {
"accuracy": 85.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 58.6,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -1807,7 +1807,7 @@
"all": {
"accuracy": 73.3,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -1816,22 +1816,22 @@
"easy": {
"accuracy": 95.5,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 48.9,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 3.7,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 57.4,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -1840,7 +1840,7 @@
"easy": {
"accuracy": 95.2,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 33.3,
@@ -1855,7 +1855,7 @@
"all": {
"accuracy": 82.7,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -1868,22 +1868,22 @@
"easy": {
"accuracy": 92.9,
"cost": 0.0054,
- "robustness": 0
+ "robustness": 56.9
},
"medium": {
"accuracy": 64.0,
"cost": 0.008,
- "robustness": 0
+ "robustness": 45.9
},
"hard": {
"accuracy": 24.4,
"cost": 0.0186,
- "robustness": 0
+ "robustness": 64.2
},
"all": {
"accuracy": 68.0,
"cost": 0.0093,
- "robustness": 0
+ "robustness": 55.9
}
},
"categories": {
@@ -1892,22 +1892,22 @@
"easy": {
"accuracy": 92.4,
"cost": 0.005,
- "robustness": 0
+ "robustness": 51.3
},
"medium": {
"accuracy": 61.6,
"cost": 0.0062,
- "robustness": 0
+ "robustness": 30.0
},
"hard": {
"accuracy": 17.4,
"cost": 0.0061,
- "robustness": 0
+ "robustness": 57.1
},
"all": {
"accuracy": 74.2,
"cost": 0.0055,
- "robustness": 0
+ "robustness": 45.5
}
},
"subcategories": {
@@ -1916,22 +1916,22 @@
"easy": {
"accuracy": 91.7,
"cost": 0.005,
- "robustness": 0
+ "robustness": 47.1
},
"medium": {
"accuracy": 52.5,
"cost": 0.0053,
- "robustness": 0
+ "robustness": 71.4
},
"hard": {
"accuracy": 12.4,
"cost": 0.0051,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 73.1,
"cost": 0.0051,
- "robustness": 0
+ "robustness": 53.2
}
}
},
@@ -1940,22 +1940,22 @@
"easy": {
"accuracy": 96.0,
"cost": 0.0048,
- "robustness": 0
+ "robustness": 80.0
},
"medium": {
"accuracy": 74.9,
"cost": 0.0075,
- "robustness": 0
+ "robustness": 7.7
},
"hard": {
"accuracy": 31.0,
"cost": 0.0089,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 77.3,
"cost": 0.0066,
- "robustness": 0
+ "robustness": 26.3
}
}
}
@@ -1966,22 +1966,22 @@
"easy": {
"accuracy": 89.8,
"cost": 0.0044,
- "robustness": 0
+ "robustness": 60.9
},
"medium": {
"accuracy": 54.3,
"cost": 0.005,
- "robustness": 0
+ "robustness": 71.4
},
"hard": {
"accuracy": 14.5,
"cost": 0.0049,
- "robustness": 0
+ "robustness": 28.6
},
"all": {
"accuracy": 69.8,
"cost": 0.0047,
- "robustness": 0
+ "robustness": 56.8
}
},
"subcategories": {
@@ -1990,22 +1990,22 @@
"easy": {
"accuracy": 89.2,
"cost": 0.0047,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 65.0,
"cost": 0.0047,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 20.0,
"cost": 0.0046,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 55.7,
"cost": 0.0047,
- "robustness": 0
+ "robustness": 37.5
}
}
},
@@ -2014,7 +2014,7 @@
"easy": {
"accuracy": 95.9,
"cost": 0.0047,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 81.2,
@@ -2024,12 +2024,12 @@
"hard": {
"accuracy": 5.6,
"cost": 0.0054,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 83.4,
"cost": 0.0048,
- "robustness": 0
+ "robustness": 62.5
}
}
},
@@ -2038,22 +2038,22 @@
"easy": {
"accuracy": 85.9,
"cost": 0.0042,
- "robustness": 0
+ "robustness": 64.3
},
"medium": {
"accuracy": 34.1,
"cost": 0.0052,
- "robustness": 0
+ "robustness": 80.0
},
"hard": {
"accuracy": 7.7,
"cost": 0.005,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 66.8,
"cost": 0.0046,
- "robustness": 0
+ "robustness": 61.9
}
}
}
@@ -2064,22 +2064,22 @@
"easy": {
"accuracy": 89.9,
"cost": 0.005,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 60.9,
"cost": 0.0063,
- "robustness": 0
+ "robustness": 25.0
},
"hard": {
"accuracy": 7.4,
"cost": 0.0074,
- "robustness": 0
+ "robustness": 57.1
},
"all": {
"accuracy": 61.9,
"cost": 0.006,
- "robustness": 0
+ "robustness": 43.2
}
},
"subcategories": {
@@ -2093,17 +2093,17 @@
"medium": {
"accuracy": 66.7,
"cost": 0.0045,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 5.9,
"cost": 0.0034,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 30.9,
"cost": 0.0037,
- "robustness": 0
+ "robustness": 0.0
}
}
},
@@ -2112,22 +2112,22 @@
"easy": {
"accuracy": 94.0,
"cost": 0.0053,
- "robustness": 0
+ "robustness": 58.3
},
"medium": {
"accuracy": 68.7,
"cost": 0.0068,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 7.5,
"cost": 0.01,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 68.1,
"cost": 0.0068,
- "robustness": 0
+ "robustness": 57.1
}
}
},
@@ -2136,7 +2136,7 @@
"easy": {
"accuracy": 91.5,
"cost": 0.0041,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 22.2,
@@ -2146,12 +2146,12 @@
"hard": {
"accuracy": 0.0,
"cost": 0.0037,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 73.8,
"cost": 0.0042,
- "robustness": 0
+ "robustness": 75.0
}
}
},
@@ -2160,22 +2160,22 @@
"easy": {
"accuracy": 76.9,
"cost": 0.0049,
- "robustness": 0
+ "robustness": 0.0
},
"medium": {
"accuracy": 44.4,
"cost": 0.0058,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 10.0,
"cost": 0.0054,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 51.3,
"cost": 0.0054,
- "robustness": 0
+ "robustness": 11.1
}
}
}
@@ -2186,22 +2186,22 @@
"easy": {
"accuracy": 97.3,
"cost": 0.0049,
- "robustness": 0
+ "robustness": 51.3
},
"medium": {
"accuracy": 74.1,
"cost": 0.0074,
- "robustness": 0
+ "robustness": 47.6
},
"hard": {
"accuracy": 12.9,
"cost": 0.011,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 78.4,
"cost": 0.0065,
- "robustness": 0
+ "robustness": 52.2
}
},
"subcategories": {
@@ -2210,7 +2210,7 @@
"easy": {
"accuracy": 93.3,
"cost": 0.0034,
- "robustness": 0
+ "robustness": 80.0
},
"medium": {
"accuracy": 60.0,
@@ -2220,12 +2220,12 @@
"hard": {
"accuracy": 6.8,
"cost": 0.005,
- "robustness": 0
+ "robustness": 33.3
},
"all": {
"accuracy": 41.2,
"cost": 0.0046,
- "robustness": 0
+ "robustness": 62.5
}
}
},
@@ -2234,22 +2234,22 @@
"easy": {
"accuracy": 98.5,
"cost": 0.0052,
- "robustness": 0
+ "robustness": 36.4
},
"medium": {
"accuracy": 78.4,
"cost": 0.009,
- "robustness": 0
+ "robustness": 54.5
},
"hard": {
"accuracy": 13.5,
"cost": 0.018,
- "robustness": 0
+ "robustness": 80.0
},
"all": {
"accuracy": 77.5,
"cost": 0.0087,
- "robustness": 0
+ "robustness": 51.9
}
}
},
@@ -2258,7 +2258,7 @@
"easy": {
"accuracy": 98.3,
"cost": 0.0054,
- "robustness": 0
+ "robustness": 42.9
},
"medium": {
"accuracy": 47.1,
@@ -2273,7 +2273,7 @@
"all": {
"accuracy": 87.4,
"cost": 0.0054,
- "robustness": 0
+ "robustness": 42.9
}
}
},
@@ -2282,12 +2282,12 @@
"easy": {
"accuracy": 97.7,
"cost": 0.0045,
- "robustness": 0
+ "robustness": 33.3
},
"medium": {
"accuracy": 82.4,
"cost": 0.0051,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 15.4,
@@ -2297,7 +2297,7 @@
"all": {
"accuracy": 80.0,
"cost": 0.0048,
- "robustness": 0
+ "robustness": 20.0
}
}
},
@@ -2306,12 +2306,12 @@
"easy": {
"accuracy": 97.7,
"cost": 0.0045,
- "robustness": 0
+ "robustness": 0.0
},
"medium": {
"accuracy": 69.0,
"cost": 0.0057,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 27.3,
@@ -2321,7 +2321,7 @@
"all": {
"accuracy": 77.3,
"cost": 0.0051,
- "robustness": 0
+ "robustness": 0.0
}
}
},
@@ -2330,7 +2330,7 @@
"easy": {
"accuracy": 90.0,
"cost": 0.0033,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 60.0,
@@ -2345,7 +2345,7 @@
"all": {
"accuracy": 84.0,
"cost": 0.0032,
- "robustness": 0
+ "robustness": 50.0
}
}
},
@@ -2354,22 +2354,22 @@
"easy": {
"accuracy": 96.5,
"cost": 0.0049,
- "robustness": 0
+ "robustness": 87.5
},
"medium": {
"accuracy": 69.5,
"cost": 0.0052,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 15.4,
"cost": 0.005,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 88.5,
"cost": 0.005,
- "robustness": 0
+ "robustness": 80.0
}
}
}
@@ -2380,22 +2380,22 @@
"easy": {
"accuracy": 88.9,
"cost": 0.0047,
- "robustness": 0
+ "robustness": 25.0
},
"medium": {
"accuracy": 59.2,
"cost": 0.0104,
- "robustness": 0
+ "robustness": 45.5
},
"hard": {
"accuracy": 16.3,
"cost": 0.0485,
- "robustness": 0
+ "robustness": 60.0
},
"all": {
"accuracy": 62.9,
"cost": 0.016,
- "robustness": 0
+ "robustness": 41.7
}
},
"subcategories": {
@@ -2404,22 +2404,22 @@
"easy": {
"accuracy": 93.2,
"cost": 0.0036,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 76.9,
"cost": 0.0041,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 8.9,
"cost": 0.0043,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 58.4,
"cost": 0.004,
- "robustness": 0
+ "robustness": 80.0
}
}
},
@@ -2428,22 +2428,22 @@
"easy": {
"accuracy": 85.0,
"cost": 0.005,
- "robustness": 0
+ "robustness": 0.0
},
"medium": {
"accuracy": 74.0,
"cost": 0.0178,
- "robustness": 0
+ "robustness": 25.0
},
"hard": {
"accuracy": 18.2,
"cost": 0.1022,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 67.0,
"cost": 0.0301,
- "robustness": 0
+ "robustness": 37.5
}
}
},
@@ -2452,22 +2452,22 @@
"easy": {
"accuracy": 91.9,
"cost": 0.005,
- "robustness": 0
+ "robustness": 20.0
},
"medium": {
"accuracy": 40.5,
"cost": 0.0062,
- "robustness": 0
+ "robustness": 40.0
},
"hard": {
"accuracy": 28.0,
"cost": 0.0057,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 60.4,
"cost": 0.0057,
- "robustness": 0
+ "robustness": 27.3
}
}
}
@@ -2478,22 +2478,22 @@
"easy": {
"accuracy": 96.7,
"cost": 0.0053,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 57.4,
"cost": 0.0041,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 24.6,
"cost": 0.0092,
- "robustness": 0
+ "robustness": 58.3
},
"all": {
"accuracy": 39.4,
"cost": 0.0081,
- "robustness": 0
+ "robustness": 62.1
}
},
"subcategories": {
@@ -2502,22 +2502,22 @@
"easy": {
"accuracy": 96.7,
"cost": 0.0053,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 57.4,
"cost": 0.0041,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 24.6,
"cost": 0.0092,
- "robustness": 0
+ "robustness": 58.3
},
"all": {
"accuracy": 39.4,
"cost": 0.0081,
- "robustness": 0
+ "robustness": 62.1
}
}
}
@@ -2528,22 +2528,22 @@
"easy": {
"accuracy": 91.7,
"cost": 0.0043,
- "robustness": 0
+ "robustness": 47.6
},
"medium": {
"accuracy": 48.2,
"cost": 0.0044,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 7.4,
"cost": 0.0037,
- "robustness": 0
+ "robustness": 20.0
},
"all": {
"accuracy": 61.4,
"cost": 0.0042,
- "robustness": 0
+ "robustness": 43.2
}
},
"subcategories": {
@@ -2552,12 +2552,12 @@
"easy": {
"accuracy": 89.0,
"cost": 0.0025,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 40.0,
"cost": 0.0025,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -2567,7 +2567,7 @@
"all": {
"accuracy": 69.5,
"cost": 0.0023,
- "robustness": 0
+ "robustness": 71.4
}
}
},
@@ -2576,22 +2576,22 @@
"easy": {
"accuracy": 91.9,
"cost": 0.005,
- "robustness": 0
+ "robustness": 27.3
},
"medium": {
"accuracy": 47.5,
"cost": 0.0048,
- "robustness": 0
+ "robustness": 60.0
},
"hard": {
"accuracy": 8.1,
"cost": 0.004,
- "robustness": 0
+ "robustness": 20.0
},
"all": {
"accuracy": 56.6,
"cost": 0.0047,
- "robustness": 0
+ "robustness": 30.8
}
}
},
@@ -2600,7 +2600,7 @@
"easy": {
"accuracy": 95.2,
"cost": 0.0037,
- "robustness": 0
+ "robustness": 75.0
},
"medium": {
"accuracy": 77.8,
@@ -2615,7 +2615,7 @@
"all": {
"accuracy": 90.4,
"cost": 0.0036,
- "robustness": 0
+ "robustness": 75.0
}
}
}
@@ -2626,22 +2626,22 @@
"easy": {
"accuracy": 78.9,
"cost": 0.0144,
- "robustness": 0
+ "robustness": 81.8
},
"medium": {
"accuracy": 43.7,
"cost": 0.0118,
- "robustness": 0
+ "robustness": 55.6
},
"hard": {
"accuracy": 54.1,
"cost": 0.0427,
- "robustness": 0
+ "robustness": 88.0
},
"all": {
"accuracy": 59.1,
"cost": 0.0269,
- "robustness": 0
+ "robustness": 80.0
}
},
"subcategories": {
@@ -2650,22 +2650,22 @@
"easy": {
"accuracy": 78.9,
"cost": 0.0144,
- "robustness": 0
+ "robustness": 81.8
},
"medium": {
"accuracy": 43.7,
"cost": 0.0118,
- "robustness": 0
+ "robustness": 55.6
},
"hard": {
"accuracy": 54.1,
"cost": 0.0427,
- "robustness": 0
+ "robustness": 88.0
},
"all": {
"accuracy": 59.1,
"cost": 0.0269,
- "robustness": 0
+ "robustness": 80.0
}
}
}
@@ -2676,22 +2676,22 @@
"easy": {
"accuracy": 97.0,
"cost": 0.0052,
- "robustness": 0
+ "robustness": 76.9
},
"medium": {
"accuracy": 78.8,
"cost": 0.0116,
- "robustness": 0
+ "robustness": 52.4
},
"hard": {
"accuracy": 26.5,
"cost": 0.0231,
- "robustness": 0
+ "robustness": 86.7
},
"all": {
"accuracy": 78.6,
"cost": 0.0104,
- "robustness": 0
+ "robustness": 71.0
}
},
"subcategories": {
@@ -2700,22 +2700,22 @@
"easy": {
"accuracy": 97.1,
"cost": 0.0054,
- "robustness": 0
+ "robustness": 61.5
},
"medium": {
"accuracy": 82.2,
"cost": 0.013,
- "robustness": 0
+ "robustness": 47.1
},
"hard": {
"accuracy": 30.1,
"cost": 0.0263,
- "robustness": 0
+ "robustness": 84.6
},
"all": {
"accuracy": 77.6,
"cost": 0.0126,
- "robustness": 0
+ "robustness": 62.8
}
}
},
@@ -2724,22 +2724,22 @@
"easy": {
"accuracy": 96.7,
"cost": 0.0048,
- "robustness": 0
+ "robustness": 92.3
},
"medium": {
"accuracy": 63.5,
"cost": 0.0053,
- "robustness": 0
+ "robustness": 75.0
},
"hard": {
"accuracy": 5.6,
"cost": 0.0047,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 81.3,
"cost": 0.0049,
- "robustness": 0
+ "robustness": 89.5
}
}
}
@@ -3684,22 +3684,22 @@
"easy": {
"accuracy": 93.5,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 55.9
},
"medium": {
"accuracy": 61.3,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 58.3
},
"hard": {
"accuracy": 26.5,
"cost": 0.0009,
- "robustness": 0
+ "robustness": 46.8
},
"all": {
"accuracy": 68.1,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 54.1
}
},
"categories": {
@@ -3708,22 +3708,22 @@
"easy": {
"accuracy": 93.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 61.5
},
"medium": {
"accuracy": 70.3,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 68.4
},
"hard": {
"accuracy": 23.9,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 57.1
},
"all": {
"accuracy": 78.3,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 63.1
}
},
"subcategories": {
@@ -3732,22 +3732,22 @@
"easy": {
"accuracy": 92.8,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 64.7
},
"medium": {
"accuracy": 61.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 15.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 76.1,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 60.9
}
}
},
@@ -3756,22 +3756,22 @@
"easy": {
"accuracy": 98.4,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 40.0
},
"medium": {
"accuracy": 83.8,
"cost": 0.001,
- "robustness": 0
+ "robustness": 76.9
},
"hard": {
"accuracy": 47.6,
"cost": 0.0015,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 84.8,
"cost": 0.0009,
- "robustness": 0
+ "robustness": 68.4
}
}
}
@@ -3782,22 +3782,22 @@
"easy": {
"accuracy": 92.4,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 60.9
},
"medium": {
"accuracy": 51.4,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 14.3
},
"hard": {
"accuracy": 7.9,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 42.9
},
"all": {
"accuracy": 69.5,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 48.6
}
},
"subcategories": {
@@ -3806,22 +3806,22 @@
"easy": {
"accuracy": 86.5,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 62.5,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 11.1,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 33.3
},
"all": {
"accuracy": 50.8,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 62.5
}
}
},
@@ -3830,7 +3830,7 @@
"easy": {
"accuracy": 95.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 83.3
},
"medium": {
"accuracy": 77.1,
@@ -3840,12 +3840,12 @@
"hard": {
"accuracy": 5.6,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 81.8,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 87.5
}
}
},
@@ -3854,22 +3854,22 @@
"easy": {
"accuracy": 91.8,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 42.9
},
"medium": {
"accuracy": 31.8,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 69.4,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 28.6
}
}
}
@@ -3880,22 +3880,22 @@
"easy": {
"accuracy": 93.3,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 33.3
},
"medium": {
"accuracy": 65.1,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 75.0
},
"hard": {
"accuracy": 6.7,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 14.3
},
"all": {
"accuracy": 64.7,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 43.2
}
},
"subcategories": {
@@ -3909,17 +3909,17 @@
"medium": {
"accuracy": 33.3,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 2.9,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 18.2,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 33.3
}
}
},
@@ -3928,22 +3928,22 @@
"easy": {
"accuracy": 93.4,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 25.0
},
"medium": {
"accuracy": 70.6,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 83.3
},
"hard": {
"accuracy": 7.5,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 33.3
},
"all": {
"accuracy": 68.5,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 42.9
}
}
},
@@ -3952,7 +3952,7 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 44.4,
@@ -3962,12 +3962,12 @@
"hard": {
"accuracy": 0.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 83.6,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 50.0
}
}
},
@@ -3976,22 +3976,22 @@
"easy": {
"accuracy": 87.7,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 33.3
},
"medium": {
"accuracy": 62.9,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 75.0
},
"hard": {
"accuracy": 10.0,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 63.1,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 44.4
}
}
}
@@ -4002,22 +4002,22 @@
"easy": {
"accuracy": 97.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 59.0
},
"medium": {
"accuracy": 75.0,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 61.9
},
"hard": {
"accuracy": 27.8,
"cost": 0.0012,
- "robustness": 0
+ "robustness": 55.6
},
"all": {
"accuracy": 80.6,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 59.4
}
},
"subcategories": {
@@ -4026,7 +4026,7 @@
"easy": {
"accuracy": 96.7,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 20.0,
@@ -4036,12 +4036,12 @@
"hard": {
"accuracy": 3.4,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 33.3
},
"all": {
"accuracy": 31.6,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 75.0
}
}
},
@@ -4050,22 +4050,22 @@
"easy": {
"accuracy": 96.9,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 36.4
},
"medium": {
"accuracy": 80.4,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 81.8
},
"hard": {
"accuracy": 40.4,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 80.0
},
"all": {
"accuracy": 81.7,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 63.0
}
}
},
@@ -4074,7 +4074,7 @@
"easy": {
"accuracy": 98.3,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 57.1
},
"medium": {
"accuracy": 64.7,
@@ -4089,7 +4089,7 @@
"all": {
"accuracy": 89.5,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 57.1
}
}
},
@@ -4098,12 +4098,12 @@
"easy": {
"accuracy": 93.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 91.2,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 30.8,
@@ -4113,7 +4113,7 @@
"all": {
"accuracy": 83.3,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 40.0
}
}
},
@@ -4122,12 +4122,12 @@
"easy": {
"accuracy": 97.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 73.8,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 54.5,
@@ -4137,7 +4137,7 @@
"all": {
"accuracy": 82.5,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 60.0
}
}
},
@@ -4146,7 +4146,7 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 60.0,
@@ -4161,7 +4161,7 @@
"all": {
"accuracy": 92.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 50.0
}
}
},
@@ -4170,22 +4170,22 @@
"easy": {
"accuracy": 96.9,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 62.5
},
"medium": {
"accuracy": 71.2,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 30.8,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 89.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 53.3
}
}
}
@@ -4196,22 +4196,22 @@
"easy": {
"accuracy": 90.9,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 75.0
},
"medium": {
"accuracy": 52.9,
"cost": 0.0012,
- "robustness": 0
+ "robustness": 45.5
},
"hard": {
"accuracy": 10.9,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 40.0
},
"all": {
"accuracy": 60.3,
"cost": 0.001,
- "robustness": 0
+ "robustness": 54.2
}
},
"subcategories": {
@@ -4220,22 +4220,22 @@
"easy": {
"accuracy": 94.9,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 51.3,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 5.4,
"cost": 0.0009,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 51.3,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 80.0
}
}
},
@@ -4244,22 +4244,22 @@
"easy": {
"accuracy": 87.9,
"cost": 0.0009,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 52.0,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 15.2,
"cost": 0.0025,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 60.5,
"cost": 0.0015,
- "robustness": 0
+ "robustness": 12.5
}
}
},
@@ -4268,22 +4268,22 @@
"easy": {
"accuracy": 92.9,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 80.0
},
"medium": {
"accuracy": 54.3,
"cost": 0.0011,
- "robustness": 0
+ "robustness": 80.0
},
"hard": {
"accuracy": 12.0,
"cost": 0.0012,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 65.8,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 72.7
}
}
}
@@ -4294,22 +4294,22 @@
"easy": {
"accuracy": 93.1,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 45.1,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 24.8,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 58.3
},
"all": {
"accuracy": 38.0,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 62.1
}
},
"subcategories": {
@@ -4318,22 +4318,22 @@
"easy": {
"accuracy": 93.1,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 45.1,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 24.8,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 58.3
},
"all": {
"accuracy": 38.0,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 62.1
}
}
}
@@ -4344,22 +4344,22 @@
"easy": {
"accuracy": 97.0,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 57.1
},
"medium": {
"accuracy": 55.5,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 4.0,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 40.0
},
"all": {
"accuracy": 65.3,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 54.1
}
},
"subcategories": {
@@ -4368,12 +4368,12 @@
"easy": {
"accuracy": 97.3,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 42.1,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 8.3,
@@ -4383,7 +4383,7 @@
"all": {
"accuracy": 76.9,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 71.4
}
}
},
@@ -4392,22 +4392,22 @@
"easy": {
"accuracy": 96.4,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 45.5
},
"medium": {
"accuracy": 56.6,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 60.0
},
"hard": {
"accuracy": 3.7,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 40.0
},
"all": {
"accuracy": 60.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 46.2
}
}
},
@@ -4416,7 +4416,7 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 75.0
},
"medium": {
"accuracy": 66.7,
@@ -4431,7 +4431,7 @@
"all": {
"accuracy": 92.3,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 75.0
}
}
}
@@ -4442,22 +4442,22 @@
"easy": {
"accuracy": 84.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 45.5
},
"medium": {
"accuracy": 41.2,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 55.6
},
"hard": {
"accuracy": 56.4,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 28.0
},
"all": {
"accuracy": 61.3,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 37.8
}
},
"subcategories": {
@@ -4466,22 +4466,22 @@
"easy": {
"accuracy": 84.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 45.5
},
"medium": {
"accuracy": 41.2,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 55.6
},
"hard": {
"accuracy": 56.4,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 28.0
},
"all": {
"accuracy": 61.3,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 37.8
}
}
}
@@ -4492,22 +4492,22 @@
"easy": {
"accuracy": 91.9,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 58.2,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 52.4
},
"hard": {
"accuracy": 31.8,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 73.3
},
"all": {
"accuracy": 70.3,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 56.5
}
},
"subcategories": {
@@ -4516,22 +4516,22 @@
"easy": {
"accuracy": 89.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 53.8
},
"medium": {
"accuracy": 58.6,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 64.7
},
"hard": {
"accuracy": 36.4,
"cost": 0.0021,
- "robustness": 0
+ "robustness": 76.9
},
"all": {
"accuracy": 67.0,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 65.1
}
}
},
@@ -4540,22 +4540,22 @@
"easy": {
"accuracy": 95.3,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 46.2
},
"medium": {
"accuracy": 56.5,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 5.6,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 78.8,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 36.8
}
}
}
@@ -5500,22 +5500,22 @@
"easy": {
"accuracy": 96.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 55.6
},
"medium": {
"accuracy": 57.3,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 62.8
},
"hard": {
"accuracy": 19.6,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 69.7
},
"all": {
"accuracy": 66.9,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 61.2
}
},
"categories": {
@@ -5524,22 +5524,22 @@
"easy": {
"accuracy": 97.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 73.1
},
"medium": {
"accuracy": 65.6,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 16.3,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 93.3
},
"all": {
"accuracy": 72.9,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 75.8
}
},
"subcategories": {
@@ -5548,22 +5548,22 @@
"easy": {
"accuracy": 97.1,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 76.9
},
"medium": {
"accuracy": 50.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 78.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 78.9
}
}
},
@@ -5572,22 +5572,22 @@
"easy": {
"accuracy": 98.1,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 69.2
},
"medium": {
"accuracy": 69.0,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 58.8
},
"hard": {
"accuracy": 19.1,
"cost": 0.0009,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 70.7,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 74.4
}
}
}
@@ -5598,22 +5598,22 @@
"easy": {
"accuracy": 96.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 40.7
},
"medium": {
"accuracy": 48.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 30.0
},
"hard": {
"accuracy": 1.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 42.9
},
"all": {
"accuracy": 71.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 38.6
}
},
"subcategories": {
@@ -5622,22 +5622,22 @@
"easy": {
"accuracy": 96.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 14.3
},
"medium": {
"accuracy": 32.9,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 20.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 72.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 14.3
}
}
},
@@ -5646,22 +5646,22 @@
"easy": {
"accuracy": 94.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 45.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 2.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 44.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
}
}
},
@@ -5670,7 +5670,7 @@
"easy": {
"accuracy": 95.9,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 68.8,
@@ -5680,12 +5680,12 @@
"hard": {
"accuracy": 0.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 79.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
}
}
},
@@ -5694,12 +5694,12 @@
"easy": {
"accuracy": 98.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 63.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 0.0,
@@ -5709,7 +5709,7 @@
"all": {
"accuracy": 81.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 85.7
}
}
}
@@ -5720,22 +5720,22 @@
"easy": {
"accuracy": 96.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 88.9
},
"medium": {
"accuracy": 53.8,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 2.7,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 57.1
},
"all": {
"accuracy": 61.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 75.7
}
},
"subcategories": {
@@ -5744,22 +5744,22 @@
"easy": {
"accuracy": 97.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 83.3
},
"medium": {
"accuracy": 54.6,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 83.3
},
"hard": {
"accuracy": 1.2,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 62.9,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 81.0
}
}
},
@@ -5768,22 +5768,22 @@
"easy": {
"accuracy": 93.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 52.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 75.0
},
"hard": {
"accuracy": 6.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 60.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 88.9
}
}
},
@@ -5797,17 +5797,17 @@
"medium": {
"accuracy": 66.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 2.9,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 29.1,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 0.0
}
}
},
@@ -5816,7 +5816,7 @@
"easy": {
"accuracy": 95.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 22.2,
@@ -5826,12 +5826,12 @@
"hard": {
"accuracy": 0.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 77.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 75.0
}
}
}
@@ -5842,22 +5842,22 @@
"easy": {
"accuracy": 86.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 36.4
},
"medium": {
"accuracy": 34.1,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 51.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 76.0
},
"all": {
"accuracy": 57.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 64.4
}
},
"subcategories": {
@@ -5866,22 +5866,22 @@
"easy": {
"accuracy": 86.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 36.4
},
"medium": {
"accuracy": 34.1,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 51.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 76.0
},
"all": {
"accuracy": 57.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 64.4
}
}
}
@@ -5892,22 +5892,22 @@
"easy": {
"accuracy": 98.1,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 41.0
},
"medium": {
"accuracy": 66.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 57.1
},
"hard": {
"accuracy": 4.6,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 77.8
},
"all": {
"accuracy": 75.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.7
}
},
"subcategories": {
@@ -5916,22 +5916,22 @@
"easy": {
"accuracy": 97.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 45.5
},
"medium": {
"accuracy": 67.2,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 72.7
},
"hard": {
"accuracy": 5.6,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 71.2,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 66.7
}
}
},
@@ -5940,22 +5940,22 @@
"easy": {
"accuracy": 99.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 37.5
},
"medium": {
"accuracy": 64.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 33.3
},
"hard": {
"accuracy": 0.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 89.1,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 33.3
}
}
},
@@ -5964,7 +5964,7 @@
"easy": {
"accuracy": 98.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 42.9
},
"medium": {
"accuracy": 70.6,
@@ -5979,7 +5979,7 @@
"all": {
"accuracy": 88.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 42.9
}
}
},
@@ -5988,12 +5988,12 @@
"easy": {
"accuracy": 97.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 33.3
},
"medium": {
"accuracy": 73.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 0.0,
@@ -6003,7 +6003,7 @@
"all": {
"accuracy": 74.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 40.0
}
}
},
@@ -6012,12 +6012,12 @@
"easy": {
"accuracy": 95.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 33.3
},
"medium": {
"accuracy": 69.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 9.1,
@@ -6027,7 +6027,7 @@
"all": {
"accuracy": 74.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 40.0
}
}
},
@@ -6036,7 +6036,7 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 60.0,
@@ -6051,7 +6051,7 @@
"all": {
"accuracy": 92.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
}
}
},
@@ -6060,7 +6060,7 @@
"easy": {
"accuracy": 96.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 40.0
},
"medium": {
"accuracy": 40.0,
@@ -6070,12 +6070,12 @@
"hard": {
"accuracy": 5.1,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 36.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
}
}
}
@@ -6086,22 +6086,22 @@
"easy": {
"accuracy": 94.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 62.8
},
"medium": {
"accuracy": 53.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 71.4
},
"hard": {
"accuracy": 5.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 71.4
},
"all": {
"accuracy": 72.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.2
}
},
"subcategories": {
@@ -6110,22 +6110,22 @@
"easy": {
"accuracy": 97.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 68.9,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 76.9
},
"hard": {
"accuracy": 7.1,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 71.9,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 78.9
}
}
},
@@ -6134,22 +6134,22 @@
"easy": {
"accuracy": 93.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 58.8
},
"medium": {
"accuracy": 43.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 57.1
},
"hard": {
"accuracy": 5.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 83.3
},
"all": {
"accuracy": 71.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 61.7
}
}
},
@@ -6158,12 +6158,12 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 45.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -6173,7 +6173,7 @@
"all": {
"accuracy": 89.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 60.0
}
}
}
@@ -6184,22 +6184,22 @@
"easy": {
"accuracy": 94.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 49.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 72.7
},
"hard": {
"accuracy": 3.4,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 80.0
},
"all": {
"accuracy": 58.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
}
},
"subcategories": {
@@ -6208,22 +6208,22 @@
"easy": {
"accuracy": 90.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 53.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 6.1,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 59.8,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 87.5
}
}
},
@@ -6232,22 +6232,22 @@
"easy": {
"accuracy": 98.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 60.0
},
"medium": {
"accuracy": 39.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 80.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 59.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 72.7
}
}
},
@@ -6256,22 +6256,22 @@
"easy": {
"accuracy": 98.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 0.0
},
"medium": {
"accuracy": 66.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 1.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 55.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 20.0
}
}
}
@@ -6282,22 +6282,22 @@
"easy": {
"accuracy": 99.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 77.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 27.2,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 58.3
},
"all": {
"accuracy": 43.7,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 58.6
}
},
"subcategories": {
@@ -6306,22 +6306,22 @@
"easy": {
"accuracy": 99.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 77.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 27.2,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 58.3
},
"all": {
"accuracy": 43.7,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 58.6
}
}
}
@@ -6332,22 +6332,22 @@
"easy": {
"accuracy": 97.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 47.8
},
"medium": {
"accuracy": 64.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 6.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 60.0
},
"all": {
"accuracy": 69.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 53.8
}
},
"subcategories": {
@@ -6356,12 +6356,12 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 25.0
},
"medium": {
"accuracy": 72.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 33.3,
@@ -6371,7 +6371,7 @@
"all": {
"accuracy": 89.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 33.3
}
}
},
@@ -6380,22 +6380,22 @@
"easy": {
"accuracy": 96.9,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 72.7
},
"medium": {
"accuracy": 62.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 60.0
},
"hard": {
"accuracy": 3.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 60.0
},
"all": {
"accuracy": 61.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 65.4
}
}
},
@@ -6404,7 +6404,7 @@
"easy": {
"accuracy": 92.9,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 25.0
},
"medium": {
"accuracy": 66.7,
@@ -6419,7 +6419,7 @@
"all": {
"accuracy": 86.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 25.0
}
}
}
@@ -6432,22 +6432,22 @@
"easy": {
"accuracy": 93.5,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 49.0
},
"medium": {
"accuracy": 46.4,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 56.6
},
"hard": {
"accuracy": 17.4,
"cost": 0.0009,
- "robustness": 0
+ "robustness": 42.2
},
"all": {
"accuracy": 62.0,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 49.3
}
},
"categories": {
@@ -6456,22 +6456,22 @@
"easy": {
"accuracy": 94.9,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 38.5
},
"medium": {
"accuracy": 51.3,
"cost": 0.0009,
- "robustness": 0
+ "robustness": 47.6
},
"hard": {
"accuracy": 6.5,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 33.3
},
"all": {
"accuracy": 65.1,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 40.3
}
},
"subcategories": {
@@ -6480,22 +6480,22 @@
"easy": {
"accuracy": 96.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 38.5
},
"medium": {
"accuracy": 45.9,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 25.0
},
"hard": {
"accuracy": 2.8,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 76.8,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 36.8
}
}
},
@@ -6504,22 +6504,22 @@
"easy": {
"accuracy": 94.3,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 38.5
},
"medium": {
"accuracy": 52.5,
"cost": 0.0011,
- "robustness": 0
+ "robustness": 52.9
},
"hard": {
"accuracy": 7.2,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 30.8
},
"all": {
"accuracy": 60.5,
"cost": 0.001,
- "robustness": 0
+ "robustness": 41.9
}
}
}
@@ -6530,22 +6530,22 @@
"easy": {
"accuracy": 93.1,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 40.7
},
"medium": {
"accuracy": 47.9,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 40.0
},
"hard": {
"accuracy": 3.7,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 69.1,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 34.1
}
},
"subcategories": {
@@ -6554,22 +6554,22 @@
"easy": {
"accuracy": 89.7,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 42.9
},
"medium": {
"accuracy": 38.8,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 70.3,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 28.6
}
}
},
@@ -6578,22 +6578,22 @@
"easy": {
"accuracy": 97.3,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 33.3
},
"medium": {
"accuracy": 47.5,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 4.4,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 46.7,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 25.0
}
}
},
@@ -6602,7 +6602,7 @@
"easy": {
"accuracy": 98.3,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 16.7
},
"medium": {
"accuracy": 56.2,
@@ -6612,12 +6612,12 @@
"hard": {
"accuracy": 5.6,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 78.6,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 12.5
}
}
},
@@ -6626,12 +6626,12 @@
"easy": {
"accuracy": 90.8,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 75.0
},
"medium": {
"accuracy": 57.9,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -6641,7 +6641,7 @@
"all": {
"accuracy": 75.0,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 85.7
}
}
}
@@ -6652,22 +6652,22 @@
"easy": {
"accuracy": 93.3,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 47.4,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 5.4,
"cost": 0.001,
- "robustness": 0
+ "robustness": 57.1
},
"all": {
"accuracy": 58.0,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 56.8
}
},
"subcategories": {
@@ -6676,22 +6676,22 @@
"easy": {
"accuracy": 94.0,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 58.3
},
"medium": {
"accuracy": 48.5,
"cost": 0.0009,
- "robustness": 0
+ "robustness": 83.3
},
"hard": {
"accuracy": 7.5,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 60.3,
"cost": 0.0009,
- "robustness": 0
+ "robustness": 66.7
}
}
},
@@ -6700,22 +6700,22 @@
"easy": {
"accuracy": 89.2,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 33.3
},
"medium": {
"accuracy": 50.8,
"cost": 0.0009,
- "robustness": 0
+ "robustness": 25.0
},
"hard": {
"accuracy": 6.7,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 58.2,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 33.3
}
}
},
@@ -6729,17 +6729,17 @@
"medium": {
"accuracy": 44.4,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 20.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 66.7
}
}
},
@@ -6748,7 +6748,7 @@
"easy": {
"accuracy": 95.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 33.3
},
"medium": {
"accuracy": 11.1,
@@ -6758,12 +6758,12 @@
"hard": {
"accuracy": 0.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 75.4,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 50.0
}
}
}
@@ -6774,22 +6774,22 @@
"easy": {
"accuracy": 84.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 45.5
},
"medium": {
"accuracy": 26.3,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 33.3
},
"hard": {
"accuracy": 51.9,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 36.0
},
"all": {
"accuracy": 55.6,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 37.8
}
},
"subcategories": {
@@ -6798,22 +6798,22 @@
"easy": {
"accuracy": 84.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 45.5
},
"medium": {
"accuracy": 26.3,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 33.3
},
"hard": {
"accuracy": 51.9,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 36.0
},
"all": {
"accuracy": 55.6,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 37.8
}
}
}
@@ -6824,22 +6824,22 @@
"easy": {
"accuracy": 94.8,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 51.3
},
"medium": {
"accuracy": 55.8,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 61.9
},
"hard": {
"accuracy": 1.0,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 44.4
},
"all": {
"accuracy": 69.8,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 53.6
}
},
"subcategories": {
@@ -6848,22 +6848,22 @@
"easy": {
"accuracy": 91.2,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 63.6
},
"medium": {
"accuracy": 57.6,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 72.7
},
"hard": {
"accuracy": 1.1,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 40.0
},
"all": {
"accuracy": 63.9,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 63.0
}
}
},
@@ -6872,22 +6872,22 @@
"easy": {
"accuracy": 98.1,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 25.0
},
"medium": {
"accuracy": 61.0,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 0.0,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 87.6,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 40.0
}
}
},
@@ -6896,7 +6896,7 @@
"easy": {
"accuracy": 95.7,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 28.6
},
"medium": {
"accuracy": 41.2,
@@ -6911,7 +6911,7 @@
"all": {
"accuracy": 83.2,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 28.6
}
}
},
@@ -6920,12 +6920,12 @@
"easy": {
"accuracy": 88.4,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 55.9,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 0.0,
@@ -6935,7 +6935,7 @@
"all": {
"accuracy": 63.3,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 60.0
}
}
},
@@ -6944,12 +6944,12 @@
"easy": {
"accuracy": 97.7,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 57.1,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 0.0,
@@ -6959,7 +6959,7 @@
"all": {
"accuracy": 69.1,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 40.0
}
}
},
@@ -6968,7 +6968,7 @@
"easy": {
"accuracy": 95.0,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 40.0,
@@ -6983,7 +6983,7 @@
"all": {
"accuracy": 84.0,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 50.0
}
}
},
@@ -6992,7 +6992,7 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 80.0
},
"medium": {
"accuracy": 36.0,
@@ -7002,12 +7002,12 @@
"hard": {
"accuracy": 1.7,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 35.1,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 75.0
}
}
}
@@ -7018,22 +7018,22 @@
"easy": {
"accuracy": 93.3,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 48.8
},
"medium": {
"accuracy": 47.1,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 71.4
},
"hard": {
"accuracy": 5.1,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 71.4
},
"all": {
"accuracy": 69.6,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 57.7
}
},
"subcategories": {
@@ -7042,22 +7042,22 @@
"easy": {
"accuracy": 94.4,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 80.0
},
"medium": {
"accuracy": 57.5,
"cost": 0.0009,
- "robustness": 0
+ "robustness": 69.2
},
"hard": {
"accuracy": 7.1,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 65.1,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 73.7
}
}
},
@@ -7066,22 +7066,22 @@
"easy": {
"accuracy": 92.6,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 47.1
},
"medium": {
"accuracy": 41.3,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 71.4
},
"hard": {
"accuracy": 4.4,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 70.1,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 53.2
}
}
},
@@ -7090,12 +7090,12 @@
"easy": {
"accuracy": 98.3,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 25.0
},
"medium": {
"accuracy": 18.2,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -7105,7 +7105,7 @@
"all": {
"accuracy": 83.6,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 40.0
}
}
}
@@ -7116,22 +7116,22 @@
"easy": {
"accuracy": 91.9,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 32.2,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 45.5
},
"hard": {
"accuracy": 1.4,
"cost": 0.001,
- "robustness": 0
+ "robustness": 20.0
},
"all": {
"accuracy": 51.1,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 58.3
}
},
"subcategories": {
@@ -7140,22 +7140,22 @@
"easy": {
"accuracy": 88.6,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 41.0,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 25.0
},
"hard": {
"accuracy": 3.0,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 54.6,
"cost": 0.0011,
- "robustness": 0
+ "robustness": 37.5
}
}
},
@@ -7164,22 +7164,22 @@
"easy": {
"accuracy": 97.0,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 25.9,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 60.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0005,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 52.5,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 72.7
}
}
},
@@ -7188,22 +7188,22 @@
"easy": {
"accuracy": 91.5,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 28.2,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 42.2,
"cost": 0.0004,
- "robustness": 0
+ "robustness": 60.0
}
}
}
@@ -7214,22 +7214,22 @@
"easy": {
"accuracy": 95.6,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 33.3
},
"medium": {
"accuracy": 48.9,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 25.3,
"cost": 0.0012,
- "robustness": 0
+ "robustness": 58.3
},
"all": {
"accuracy": 38.9,
"cost": 0.001,
- "robustness": 0
+ "robustness": 58.6
}
},
"subcategories": {
@@ -7238,22 +7238,22 @@
"easy": {
"accuracy": 95.6,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 33.3
},
"medium": {
"accuracy": 48.9,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 25.3,
"cost": 0.0012,
- "robustness": 0
+ "robustness": 58.3
},
"all": {
"accuracy": 38.9,
"cost": 0.001,
- "robustness": 0
+ "robustness": 58.6
}
}
}
@@ -7264,22 +7264,22 @@
"easy": {
"accuracy": 94.9,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 52.2
},
"medium": {
"accuracy": 44.6,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 2.0,
"cost": 0.0007,
- "robustness": 0
+ "robustness": 40.0
},
"all": {
"accuracy": 62.4,
"cost": 0.0006,
- "robustness": 0
+ "robustness": 51.3
}
},
"subcategories": {
@@ -7288,12 +7288,12 @@
"easy": {
"accuracy": 96.3,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 37.5
},
"medium": {
"accuracy": 44.8,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 0.0,
@@ -7303,7 +7303,7 @@
"all": {
"accuracy": 78.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 33.3
}
}
},
@@ -7312,22 +7312,22 @@
"easy": {
"accuracy": 94.6,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 72.7
},
"medium": {
"accuracy": 44.6,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 80.0
},
"hard": {
"accuracy": 2.2,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 40.0
},
"all": {
"accuracy": 55.4,
"cost": 0.0008,
- "robustness": 0
+ "robustness": 61.5
}
}
},
@@ -7336,7 +7336,7 @@
"easy": {
"accuracy": 92.9,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 25.0
},
"medium": {
"accuracy": 44.4,
@@ -7351,7 +7351,7 @@
"all": {
"accuracy": 82.7,
"cost": 0.0003,
- "robustness": 0
+ "robustness": 25.0
}
}
}
@@ -8296,22 +8296,22 @@
"easy": {
"accuracy": 89.3,
"cost": 0.0031,
- "robustness": 0
+ "robustness": 83.3
},
"medium": {
"accuracy": 42.2,
"cost": 0.0051,
- "robustness": 0
+ "robustness": 84.1
},
"hard": {
"accuracy": 17.1,
"cost": 0.0057,
- "robustness": 0
+ "robustness": 82.6
},
"all": {
"accuracy": 58.7,
"cost": 0.0043,
- "robustness": 0
+ "robustness": 83.3
}
},
"categories": {
@@ -8320,22 +8320,22 @@
"easy": {
"accuracy": 87.0,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 88.5
},
"medium": {
"accuracy": 39.6,
"cost": 0.0067,
- "robustness": 0
+ "robustness": 85.7
},
"hard": {
"accuracy": 1.6,
"cost": 0.0111,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 56.4,
"cost": 0.0053,
- "robustness": 0
+ "robustness": 82.3
}
},
"subcategories": {
@@ -8344,22 +8344,22 @@
"easy": {
"accuracy": 81.5,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 76.9
},
"medium": {
"accuracy": 36.5,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 75.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0012,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 64.4,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 73.7
}
}
},
@@ -8368,22 +8368,22 @@
"easy": {
"accuracy": 90.7,
"cost": 0.0031,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 40.3,
"cost": 0.0079,
- "robustness": 0
+ "robustness": 88.2
},
"hard": {
"accuracy": 1.9,
"cost": 0.0128,
- "robustness": 0
+ "robustness": 69.2
},
"all": {
"accuracy": 53.3,
"cost": 0.0069,
- "robustness": 0
+ "robustness": 86.0
}
}
}
@@ -8394,22 +8394,22 @@
"easy": {
"accuracy": 89.0,
"cost": 0.0033,
- "robustness": 0
+ "robustness": 96.3
},
"medium": {
"accuracy": 38.4,
"cost": 0.0038,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 3.7,
"cost": 0.0026,
- "robustness": 0
+ "robustness": 85.7
},
"all": {
"accuracy": 63.9,
"cost": 0.0034,
- "robustness": 0
+ "robustness": 95.5
}
},
"subcategories": {
@@ -8418,22 +8418,22 @@
"easy": {
"accuracy": 84.3,
"cost": 0.0032,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 30.6,
"cost": 0.004,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 7.7,
"cost": 0.0034,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 64.7,
"cost": 0.0035,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -8442,22 +8442,22 @@
"easy": {
"accuracy": 97.3,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 37.5,
"cost": 0.0032,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 4.4,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 43.4,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 87.5
}
}
},
@@ -8466,7 +8466,7 @@
"easy": {
"accuracy": 91.7,
"cost": 0.0035,
- "robustness": 0
+ "robustness": 83.3
},
"medium": {
"accuracy": 56.2,
@@ -8476,12 +8476,12 @@
"hard": {
"accuracy": 0.0,
"cost": 0.0036,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 73.8,
"cost": 0.0035,
- "robustness": 0
+ "robustness": 87.5
}
}
},
@@ -8490,12 +8490,12 @@
"easy": {
"accuracy": 92.3,
"cost": 0.0036,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 34.2,
"cost": 0.0041,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -8505,7 +8505,7 @@
"all": {
"accuracy": 67.6,
"cost": 0.0038,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -8516,22 +8516,22 @@
"easy": {
"accuracy": 87.9,
"cost": 0.0037,
- "robustness": 0
+ "robustness": 61.1
},
"medium": {
"accuracy": 41.5,
"cost": 0.0058,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 2.0,
"cost": 0.0065,
- "robustness": 0
+ "robustness": 85.7
},
"all": {
"accuracy": 52.9,
"cost": 0.0051,
- "robustness": 0
+ "robustness": 67.6
}
},
"subcategories": {
@@ -8540,22 +8540,22 @@
"easy": {
"accuracy": 84.7,
"cost": 0.004,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 40.5,
"cost": 0.0068,
- "robustness": 0
+ "robustness": 83.3
},
"hard": {
"accuracy": 3.8,
"cost": 0.0097,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 52.6,
"cost": 0.0062,
- "robustness": 0
+ "robustness": 61.9
}
}
},
@@ -8564,22 +8564,22 @@
"easy": {
"accuracy": 90.8,
"cost": 0.0044,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 44.4,
"cost": 0.005,
- "robustness": 0
+ "robustness": 25.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0048,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 55.1,
"cost": 0.0047,
- "robustness": 0
+ "robustness": 66.7
}
}
},
@@ -8593,17 +8593,17 @@
"medium": {
"accuracy": 50.0,
"cost": 0.0011,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0011,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 21.8,
"cost": 0.0011,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -8612,7 +8612,7 @@
"easy": {
"accuracy": 95.7,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 22.2,
@@ -8622,12 +8622,12 @@
"hard": {
"accuracy": 0.0,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 77.0,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 75.0
}
}
}
@@ -8638,22 +8638,22 @@
"easy": {
"accuracy": 86.1,
"cost": 0.0021,
- "robustness": 0
+ "robustness": 81.8
},
"medium": {
"accuracy": 46.7,
"cost": 0.0034,
- "robustness": 0
+ "robustness": 88.9
},
"hard": {
"accuracy": 49.4,
"cost": 0.0037,
- "robustness": 0
+ "robustness": 92.0
},
"all": {
"accuracy": 59.7,
"cost": 0.0032,
- "robustness": 0
+ "robustness": 88.9
}
},
"subcategories": {
@@ -8662,22 +8662,22 @@
"easy": {
"accuracy": 86.1,
"cost": 0.0021,
- "robustness": 0
+ "robustness": 81.8
},
"medium": {
"accuracy": 46.7,
"cost": 0.0034,
- "robustness": 0
+ "robustness": 88.9
},
"hard": {
"accuracy": 49.4,
"cost": 0.0037,
- "robustness": 0
+ "robustness": 92.0
},
"all": {
"accuracy": 59.7,
"cost": 0.0032,
- "robustness": 0
+ "robustness": 88.9
}
}
}
@@ -8688,22 +8688,22 @@
"easy": {
"accuracy": 91.3,
"cost": 0.0029,
- "robustness": 0
+ "robustness": 74.4
},
"medium": {
"accuracy": 39.6,
"cost": 0.0054,
- "robustness": 0
+ "robustness": 71.4
},
"hard": {
"accuracy": 3.6,
"cost": 0.0072,
- "robustness": 0
+ "robustness": 77.8
},
"all": {
"accuracy": 63.2,
"cost": 0.0043,
- "robustness": 0
+ "robustness": 73.9
}
},
"subcategories": {
@@ -8712,22 +8712,22 @@
"easy": {
"accuracy": 89.3,
"cost": 0.0039,
- "robustness": 0
+ "robustness": 90.9
},
"medium": {
"accuracy": 40.4,
"cost": 0.0069,
- "robustness": 0
+ "robustness": 63.6
},
"hard": {
"accuracy": 4.5,
"cost": 0.013,
- "robustness": 0
+ "robustness": 60.0
},
"all": {
"accuracy": 56.4,
"cost": 0.0065,
- "robustness": 0
+ "robustness": 74.1
}
}
},
@@ -8736,22 +8736,22 @@
"easy": {
"accuracy": 91.5,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 25.0
},
"medium": {
"accuracy": 22.0,
"cost": 0.001,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 15.4,
"cost": 0.0009,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 76.1,
"cost": 0.0012,
- "robustness": 0
+ "robustness": 46.7
}
}
},
@@ -8760,7 +8760,7 @@
"easy": {
"accuracy": 93.2,
"cost": 0.0043,
- "robustness": 0
+ "robustness": 85.7
},
"medium": {
"accuracy": 29.4,
@@ -8775,7 +8775,7 @@
"all": {
"accuracy": 79.7,
"cost": 0.0045,
- "robustness": 0
+ "robustness": 85.7
}
}
},
@@ -8784,12 +8784,12 @@
"easy": {
"accuracy": 95.3,
"cost": 0.0044,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 52.9,
"cost": 0.0059,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -8799,7 +8799,7 @@
"all": {
"accuracy": 65.6,
"cost": 0.005,
- "robustness": 0
+ "robustness": 80.0
}
}
},
@@ -8808,12 +8808,12 @@
"easy": {
"accuracy": 90.9,
"cost": 0.003,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 50.0,
"cost": 0.0056,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -8823,7 +8823,7 @@
"all": {
"accuracy": 62.9,
"cost": 0.0043,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -8832,7 +8832,7 @@
"easy": {
"accuracy": 85.0,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 60.0,
@@ -8847,7 +8847,7 @@
"all": {
"accuracy": 80.0,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 50.0
}
}
},
@@ -8856,7 +8856,7 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 40.0,
@@ -8866,12 +8866,12 @@
"hard": {
"accuracy": 1.7,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 36.0,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -8882,22 +8882,22 @@
"easy": {
"accuracy": 87.2,
"cost": 0.0039,
- "robustness": 0
+ "robustness": 83.7
},
"medium": {
"accuracy": 33.8,
"cost": 0.0051,
- "robustness": 0
+ "robustness": 90.5
},
"hard": {
"accuracy": 3.2,
"cost": 0.0047,
- "robustness": 0
+ "robustness": 57.1
},
"all": {
"accuracy": 61.8,
"cost": 0.0043,
- "robustness": 0
+ "robustness": 83.1
}
},
"subcategories": {
@@ -8906,22 +8906,22 @@
"easy": {
"accuracy": 84.9,
"cost": 0.0039,
- "robustness": 0
+ "robustness": 80.0
},
"medium": {
"accuracy": 35.3,
"cost": 0.0075,
- "robustness": 0
+ "robustness": 92.3
},
"hard": {
"accuracy": 0.0,
"cost": 0.0073,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 49.6,
"cost": 0.0061,
- "robustness": 0
+ "robustness": 84.2
}
}
},
@@ -8930,22 +8930,22 @@
"easy": {
"accuracy": 87.4,
"cost": 0.0042,
- "robustness": 0
+ "robustness": 85.3
},
"medium": {
"accuracy": 33.5,
"cost": 0.0036,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 4.4,
"cost": 0.0038,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 64.8,
"cost": 0.004,
- "robustness": 0
+ "robustness": 85.1
}
}
},
@@ -8954,12 +8954,12 @@
"easy": {
"accuracy": 90.0,
"cost": 0.0012,
- "robustness": 0
+ "robustness": 75.0
},
"medium": {
"accuracy": 18.2,
"cost": 0.0009,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 0.0,
@@ -8969,7 +8969,7 @@
"all": {
"accuracy": 76.7,
"cost": 0.0012,
- "robustness": 0
+ "robustness": 60.0
}
}
}
@@ -8980,22 +8980,22 @@
"easy": {
"accuracy": 90.9,
"cost": 0.0028,
- "robustness": 0
+ "robustness": 75.0
},
"medium": {
"accuracy": 47.8,
"cost": 0.0046,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 7.5,
"cost": 0.0059,
- "robustness": 0
+ "robustness": 80.0
},
"all": {
"accuracy": 57.7,
"cost": 0.0041,
- "robustness": 0
+ "robustness": 87.5
}
},
"subcategories": {
@@ -9004,22 +9004,22 @@
"easy": {
"accuracy": 89.3,
"cost": 0.0039,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 61.0,
"cost": 0.0082,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 6.1,
"cost": 0.0107,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 62.1,
"cost": 0.0068,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -9028,22 +9028,22 @@
"easy": {
"accuracy": 89.9,
"cost": 0.0021,
- "robustness": 0
+ "robustness": 60.0
},
"medium": {
"accuracy": 30.2,
"cost": 0.0027,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 16.0,
"cost": 0.003,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 53.3,
"cost": 0.0025,
- "robustness": 0
+ "robustness": 72.7
}
}
},
@@ -9052,22 +9052,22 @@
"easy": {
"accuracy": 96.6,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 66.7,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 5.4,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 55.8,
"cost": 0.0015,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -9078,22 +9078,22 @@
"easy": {
"accuracy": 95.0,
"cost": 0.0027,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 71.7,
"cost": 0.0029,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 25.6,
"cost": 0.0051,
- "robustness": 0
+ "robustness": 87.5
},
"all": {
"accuracy": 41.3,
"cost": 0.0045,
- "robustness": 0
+ "robustness": 86.2
}
},
"subcategories": {
@@ -9102,22 +9102,22 @@
"easy": {
"accuracy": 95.0,
"cost": 0.0027,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 71.7,
"cost": 0.0029,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 25.6,
"cost": 0.0051,
- "robustness": 0
+ "robustness": 87.5
},
"all": {
"accuracy": 41.3,
"cost": 0.0045,
- "robustness": 0
+ "robustness": 86.2
}
}
}
@@ -9128,22 +9128,22 @@
"easy": {
"accuracy": 93.6,
"cost": 0.0029,
- "robustness": 0
+ "robustness": 95.7
},
"medium": {
"accuracy": 57.6,
"cost": 0.0034,
- "robustness": 0
+ "robustness": 83.3
},
"hard": {
"accuracy": 6.7,
"cost": 0.0034,
- "robustness": 0
+ "robustness": 90.0
},
"all": {
"accuracy": 66.0,
"cost": 0.0031,
- "robustness": 0
+ "robustness": 92.3
}
},
"subcategories": {
@@ -9152,12 +9152,12 @@
"easy": {
"accuracy": 89.9,
"cost": 0.001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 58.6,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 25.0,
@@ -9167,7 +9167,7 @@
"all": {
"accuracy": 78.7,
"cost": 0.0011,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -9176,22 +9176,22 @@
"easy": {
"accuracy": 94.6,
"cost": 0.004,
- "robustness": 0
+ "robustness": 90.9
},
"medium": {
"accuracy": 57.6,
"cost": 0.004,
- "robustness": 0
+ "robustness": 80.0
},
"hard": {
"accuracy": 5.1,
"cost": 0.0036,
- "robustness": 0
+ "robustness": 90.0
},
"all": {
"accuracy": 59.8,
"cost": 0.0039,
- "robustness": 0
+ "robustness": 88.5
}
}
},
@@ -9200,7 +9200,7 @@
"easy": {
"accuracy": 97.6,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 55.6,
@@ -9215,7 +9215,7 @@
"all": {
"accuracy": 88.5,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -9228,22 +9228,22 @@
"easy": {
"accuracy": 93.5,
"cost": 0.0036,
- "robustness": 0
+ "robustness": 84.8
},
"medium": {
"accuracy": 45.5,
"cost": 0.0057,
- "robustness": 0
+ "robustness": 85.0
},
"hard": {
"accuracy": 17.0,
"cost": 0.0064,
- "robustness": 0
+ "robustness": 66.1
},
"all": {
"accuracy": 61.6,
"cost": 0.0048,
- "robustness": 0
+ "robustness": 80.0
}
},
"categories": {
@@ -9252,22 +9252,22 @@
"easy": {
"accuracy": 94.8,
"cost": 0.0032,
- "robustness": 0
+ "robustness": 96.2
},
"medium": {
"accuracy": 43.7,
"cost": 0.0081,
- "robustness": 0
+ "robustness": 90.5
},
"hard": {
"accuracy": 1.6,
"cost": 0.0142,
- "robustness": 0
+ "robustness": 80.0
},
"all": {
"accuracy": 61.6,
"cost": 0.0068,
- "robustness": 0
+ "robustness": 90.3
}
},
"subcategories": {
@@ -9276,22 +9276,22 @@
"easy": {
"accuracy": 94.2,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 92.3
},
"medium": {
"accuracy": 47.1,
"cost": 0.0021,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0023,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 75.5,
"cost": 0.0023,
- "robustness": 0
+ "robustness": 94.7
}
}
},
@@ -9300,22 +9300,22 @@
"easy": {
"accuracy": 95.2,
"cost": 0.0037,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 43.0,
"cost": 0.0095,
- "robustness": 0
+ "robustness": 88.2
},
"hard": {
"accuracy": 1.9,
"cost": 0.0163,
- "robustness": 0
+ "robustness": 76.9
},
"all": {
"accuracy": 56.2,
"cost": 0.0085,
- "robustness": 0
+ "robustness": 88.4
}
}
}
@@ -9326,22 +9326,22 @@
"easy": {
"accuracy": 90.0,
"cost": 0.0035,
- "robustness": 0
+ "robustness": 96.3
},
"medium": {
"accuracy": 39.3,
"cost": 0.0039,
- "robustness": 0
+ "robustness": 60.0
},
"hard": {
"accuracy": 3.7,
"cost": 0.0025,
- "robustness": 0
+ "robustness": 57.1
},
"all": {
"accuracy": 64.7,
"cost": 0.0035,
- "robustness": 0
+ "robustness": 81.8
}
},
"subcategories": {
@@ -9350,22 +9350,22 @@
"easy": {
"accuracy": 84.3,
"cost": 0.0032,
- "robustness": 0
+ "robustness": 92.9
},
"medium": {
"accuracy": 30.6,
"cost": 0.004,
- "robustness": 0
+ "robustness": 80.0
},
"hard": {
"accuracy": 7.7,
"cost": 0.0034,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 64.7,
"cost": 0.0035,
- "robustness": 0
+ "robustness": 90.5
}
}
},
@@ -9374,22 +9374,22 @@
"easy": {
"accuracy": 97.3,
"cost": 0.0028,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 40.0,
"cost": 0.0033,
- "robustness": 0
+ "robustness": 0.0
},
"hard": {
"accuracy": 4.4,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 33.3
},
"all": {
"accuracy": 44.3,
"cost": 0.0026,
- "robustness": 0
+ "robustness": 50.0
}
}
},
@@ -9398,7 +9398,7 @@
"easy": {
"accuracy": 95.0,
"cost": 0.004,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 58.3,
@@ -9408,12 +9408,12 @@
"hard": {
"accuracy": 0.0,
"cost": 0.0035,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 76.5,
"cost": 0.0039,
- "robustness": 0
+ "robustness": 87.5
}
}
},
@@ -9422,12 +9422,12 @@
"easy": {
"accuracy": 92.3,
"cost": 0.0036,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 34.2,
"cost": 0.0041,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 0.0,
@@ -9437,7 +9437,7 @@
"all": {
"accuracy": 67.6,
"cost": 0.0038,
- "robustness": 0
+ "robustness": 85.7
}
}
}
@@ -9448,22 +9448,22 @@
"easy": {
"accuracy": 93.3,
"cost": 0.0043,
- "robustness": 0
+ "robustness": 94.4
},
"medium": {
"accuracy": 46.6,
"cost": 0.0063,
- "robustness": 0
+ "robustness": 83.3
},
"hard": {
"accuracy": 3.4,
"cost": 0.0067,
- "robustness": 0
+ "robustness": 85.7
},
"all": {
"accuracy": 57.3,
"cost": 0.0055,
- "robustness": 0
+ "robustness": 89.2
}
},
"subcategories": {
@@ -9472,22 +9472,22 @@
"easy": {
"accuracy": 91.8,
"cost": 0.0048,
- "robustness": 0
+ "robustness": 91.7
},
"medium": {
"accuracy": 46.0,
"cost": 0.0072,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 5.0,
"cost": 0.01,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 58.0,
"cost": 0.0067,
- "robustness": 0
+ "robustness": 90.5
}
}
},
@@ -9496,22 +9496,22 @@
"easy": {
"accuracy": 93.8,
"cost": 0.0048,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 49.2,
"cost": 0.006,
- "robustness": 0
+ "robustness": 75.0
},
"hard": {
"accuracy": 3.3,
"cost": 0.0052,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 58.9,
"cost": 0.0053,
- "robustness": 0
+ "robustness": 88.9
}
}
},
@@ -9525,17 +9525,17 @@
"medium": {
"accuracy": 55.6,
"cost": 0.0012,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0011,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 23.6,
"cost": 0.0012,
- "robustness": 0
+ "robustness": 66.7
}
}
},
@@ -9544,7 +9544,7 @@
"easy": {
"accuracy": 97.9,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 22.2,
@@ -9554,12 +9554,12 @@
"hard": {
"accuracy": 0.0,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 78.7,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -9570,22 +9570,22 @@
"easy": {
"accuracy": 88.0,
"cost": 0.0023,
- "robustness": 0
+ "robustness": 72.7
},
"medium": {
"accuracy": 46.1,
"cost": 0.0036,
- "robustness": 0
+ "robustness": 88.9
},
"hard": {
"accuracy": 49.5,
"cost": 0.0038,
- "robustness": 0
+ "robustness": 56.0
},
"all": {
"accuracy": 60.2,
"cost": 0.0033,
- "robustness": 0
+ "robustness": 66.7
}
},
"subcategories": {
@@ -9594,22 +9594,22 @@
"easy": {
"accuracy": 88.0,
"cost": 0.0023,
- "robustness": 0
+ "robustness": 72.7
},
"medium": {
"accuracy": 46.1,
"cost": 0.0036,
- "robustness": 0
+ "robustness": 88.9
},
"hard": {
"accuracy": 49.5,
"cost": 0.0038,
- "robustness": 0
+ "robustness": 56.0
},
"all": {
"accuracy": 60.2,
"cost": 0.0033,
- "robustness": 0
+ "robustness": 66.7
}
}
}
@@ -9620,22 +9620,22 @@
"easy": {
"accuracy": 95.9,
"cost": 0.0036,
- "robustness": 0
+ "robustness": 79.5
},
"medium": {
"accuracy": 44.7,
"cost": 0.006,
- "robustness": 0
+ "robustness": 81.0
},
"hard": {
"accuracy": 3.1,
"cost": 0.0074,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 67.2,
"cost": 0.0048,
- "robustness": 0
+ "robustness": 78.3
}
},
"subcategories": {
@@ -9644,22 +9644,22 @@
"easy": {
"accuracy": 92.4,
"cost": 0.0042,
- "robustness": 0
+ "robustness": 81.8
},
"medium": {
"accuracy": 44.4,
"cost": 0.0073,
- "robustness": 0
+ "robustness": 81.8
},
"hard": {
"accuracy": 3.4,
"cost": 0.0131,
- "robustness": 0
+ "robustness": 60.0
},
"all": {
"accuracy": 59.2,
"cost": 0.0068,
- "robustness": 0
+ "robustness": 77.8
}
}
},
@@ -9668,22 +9668,22 @@
"easy": {
"accuracy": 97.7,
"cost": 0.0027,
- "robustness": 0
+ "robustness": 75.0
},
"medium": {
"accuracy": 37.3,
"cost": 0.0027,
- "robustness": 0
+ "robustness": 66.7
},
"hard": {
"accuracy": 15.4,
"cost": 0.0023,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 83.6,
"cost": 0.0027,
- "robustness": 0
+ "robustness": 73.3
}
}
},
@@ -9692,7 +9692,7 @@
"easy": {
"accuracy": 97.4,
"cost": 0.0046,
- "robustness": 0
+ "robustness": 71.4
},
"medium": {
"accuracy": 29.4,
@@ -9707,7 +9707,7 @@
"all": {
"accuracy": 83.2,
"cost": 0.0048,
- "robustness": 0
+ "robustness": 71.4
}
}
},
@@ -9716,12 +9716,12 @@
"easy": {
"accuracy": 97.7,
"cost": 0.0043,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 55.9,
"cost": 0.0061,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -9731,7 +9731,7 @@
"all": {
"accuracy": 67.8,
"cost": 0.0051,
- "robustness": 0
+ "robustness": 80.0
}
}
},
@@ -9740,12 +9740,12 @@
"easy": {
"accuracy": 95.5,
"cost": 0.0034,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 54.8,
"cost": 0.0059,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -9755,7 +9755,7 @@
"all": {
"accuracy": 67.0,
"cost": 0.0046,
- "robustness": 0
+ "robustness": 80.0
}
}
},
@@ -9764,7 +9764,7 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 60.0,
@@ -9779,7 +9779,7 @@
"all": {
"accuracy": 92.0,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -9788,7 +9788,7 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0017,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 40.0,
@@ -9798,12 +9798,12 @@
"hard": {
"accuracy": 1.7,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 36.0,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 87.5
}
}
}
@@ -9814,22 +9814,22 @@
"easy": {
"accuracy": 91.6,
"cost": 0.0043,
- "robustness": 0
+ "robustness": 74.4
},
"medium": {
"accuracy": 38.1,
"cost": 0.0058,
- "robustness": 0
+ "robustness": 90.5
},
"hard": {
"accuracy": 3.8,
"cost": 0.0052,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 65.7,
"cost": 0.0049,
- "robustness": 0
+ "robustness": 81.7
}
},
"subcategories": {
@@ -9838,22 +9838,22 @@
"easy": {
"accuracy": 90.5,
"cost": 0.0046,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 38.3,
"cost": 0.0086,
- "robustness": 0
+ "robustness": 84.6
},
"hard": {
"accuracy": 0.0,
"cost": 0.0081,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 53.1,
"cost": 0.007,
- "robustness": 0
+ "robustness": 89.5
}
}
},
@@ -9862,22 +9862,22 @@
"easy": {
"accuracy": 91.4,
"cost": 0.0045,
- "robustness": 0
+ "robustness": 70.6
},
"medium": {
"accuracy": 37.6,
"cost": 0.004,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 5.3,
"cost": 0.0042,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 68.4,
"cost": 0.0043,
- "robustness": 0
+ "robustness": 78.7
}
}
},
@@ -9886,12 +9886,12 @@
"easy": {
"accuracy": 96.7,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 75.0
},
"medium": {
"accuracy": 45.5,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -9901,7 +9901,7 @@
"all": {
"accuracy": 86.3,
"cost": 0.0022,
- "robustness": 0
+ "robustness": 80.0
}
}
}
@@ -9912,22 +9912,22 @@
"easy": {
"accuracy": 94.0,
"cost": 0.0031,
- "robustness": 0
+ "robustness": 87.5
},
"medium": {
"accuracy": 49.4,
"cost": 0.0048,
- "robustness": 0
+ "robustness": 90.9
},
"hard": {
"accuracy": 7.5,
"cost": 0.0059,
- "robustness": 0
+ "robustness": 60.0
},
"all": {
"accuracy": 59.6,
"cost": 0.0043,
- "robustness": 0
+ "robustness": 83.3
}
},
"subcategories": {
@@ -9936,22 +9936,22 @@
"easy": {
"accuracy": 91.4,
"cost": 0.004,
- "robustness": 0
+ "robustness": 50.0
},
"medium": {
"accuracy": 63.0,
"cost": 0.0083,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 6.1,
"cost": 0.0107,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 63.7,
"cost": 0.0069,
- "robustness": 0
+ "robustness": 87.5
}
}
},
@@ -9960,22 +9960,22 @@
"easy": {
"accuracy": 93.9,
"cost": 0.0027,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 30.2,
"cost": 0.0029,
- "robustness": 0
+ "robustness": 80.0
},
"hard": {
"accuracy": 16.0,
"cost": 0.0033,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 55.0,
"cost": 0.0029,
- "robustness": 0
+ "robustness": 90.9
}
}
},
@@ -9984,22 +9984,22 @@
"easy": {
"accuracy": 100.0,
"cost": 0.0018,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 71.8,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 5.4,
"cost": 0.0014,
- "robustness": 0
+ "robustness": 0.0
},
"all": {
"accuracy": 58.4,
"cost": 0.0016,
- "robustness": 0
+ "robustness": 60.0
}
}
}
@@ -10010,22 +10010,22 @@
"easy": {
"accuracy": 95.4,
"cost": 0.0028,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 65.3,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 24.7,
"cost": 0.0057,
- "robustness": 0
+ "robustness": 45.8
},
"all": {
"accuracy": 40.1,
"cost": 0.0049,
- "robustness": 0
+ "robustness": 48.3
}
},
"subcategories": {
@@ -10034,22 +10034,22 @@
"easy": {
"accuracy": 95.4,
"cost": 0.0028,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 65.3,
"cost": 0.0024,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 24.7,
"cost": 0.0057,
- "robustness": 0
+ "robustness": 45.8
},
"all": {
"accuracy": 40.1,
"cost": 0.0049,
- "robustness": 0
+ "robustness": 48.3
}
}
}
@@ -10060,22 +10060,22 @@
"easy": {
"accuracy": 96.0,
"cost": 0.0032,
- "robustness": 0
+ "robustness": 87.0
},
"medium": {
"accuracy": 62.1,
"cost": 0.0038,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 6.7,
"cost": 0.0036,
- "robustness": 0
+ "robustness": 90.0
},
"all": {
"accuracy": 68.4,
"cost": 0.0035,
- "robustness": 0
+ "robustness": 89.7
}
},
"subcategories": {
@@ -10084,12 +10084,12 @@
"easy": {
"accuracy": 95.4,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 58.6,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 25.0,
@@ -10099,7 +10099,7 @@
"all": {
"accuracy": 82.7,
"cost": 0.0013,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -10108,22 +10108,22 @@
"easy": {
"accuracy": 96.0,
"cost": 0.0044,
- "robustness": 0
+ "robustness": 72.7
},
"medium": {
"accuracy": 62.6,
"cost": 0.0045,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 5.1,
"cost": 0.0038,
- "robustness": 0
+ "robustness": 90.0
},
"all": {
"accuracy": 61.8,
"cost": 0.0043,
- "robustness": 0
+ "robustness": 84.6
}
}
},
@@ -10132,7 +10132,7 @@
"easy": {
"accuracy": 97.6,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 66.7,
@@ -10147,7 +10147,7 @@
"all": {
"accuracy": 90.4,
"cost": 0.0019,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -10160,22 +10160,22 @@
"easy": {
"accuracy": 54.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 88.4
},
"medium": {
"accuracy": 14.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 87.6
},
"hard": {
"accuracy": 8.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 77.1
},
"all": {
"accuracy": 32.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 85.2
}
},
"categories": {
@@ -10184,22 +10184,22 @@
"easy": {
"accuracy": 55.3,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 11.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 76.2
},
"hard": {
"accuracy": 1.2,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 86.7
},
"all": {
"accuracy": 31.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 88.7
}
},
"subcategories": {
@@ -10208,22 +10208,22 @@
"easy": {
"accuracy": 58.2,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 16.5,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 8.3,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 44.7,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -10232,22 +10232,22 @@
"easy": {
"accuracy": 53.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 9.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 70.6
},
"hard": {
"accuracy": 0.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 84.6
},
"all": {
"accuracy": 25.9,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 83.7
}
}
}
@@ -10258,22 +10258,22 @@
"easy": {
"accuracy": 52.9,
"cost": 0.0,
- "robustness": 0
+ "robustness": 92.6
},
"medium": {
"accuracy": 19.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 90.0
},
"hard": {
"accuracy": 7.4,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 37.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 93.2
}
},
"subcategories": {
@@ -10282,22 +10282,22 @@
"easy": {
"accuracy": 60.0,
"cost": 0.0,
- "robustness": 0
+ "robustness": 85.7
},
"medium": {
"accuracy": 30.6,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 7.7,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 48.8,
"cost": 0.0,
- "robustness": 0
+ "robustness": 90.5
}
}
},
@@ -10306,22 +10306,22 @@
"easy": {
"accuracy": 32.4,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 10.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 2.2,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 13.9,
"cost": 0.0,
- "robustness": 0
+ "robustness": 87.5
}
}
},
@@ -10330,7 +10330,7 @@
"easy": {
"accuracy": 49.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 18.8,
@@ -10340,12 +10340,12 @@
"hard": {
"accuracy": 11.1,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 38.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -10354,12 +10354,12 @@
"easy": {
"accuracy": 50.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 2.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 40.0,
@@ -10369,7 +10369,7 @@
"all": {
"accuracy": 33.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -10380,22 +10380,22 @@
"easy": {
"accuracy": 46.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 11.1,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 75.0
},
"hard": {
"accuracy": 2.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 85.7
},
"all": {
"accuracy": 24.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 73.0
}
},
"subcategories": {
@@ -10404,22 +10404,22 @@
"easy": {
"accuracy": 47.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 75.0
},
"medium": {
"accuracy": 10.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 83.3
},
"hard": {
"accuracy": 3.8,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 24.9,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 76.2
}
}
},
@@ -10428,22 +10428,22 @@
"easy": {
"accuracy": 29.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 0.0
},
"medium": {
"accuracy": 6.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 3.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 15.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 44.4
}
}
},
@@ -10457,17 +10457,17 @@
"medium": {
"accuracy": 5.6,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 3.6,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -10476,7 +10476,7 @@
"easy": {
"accuracy": 68.1,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 66.7,
@@ -10486,12 +10486,12 @@
"hard": {
"accuracy": 0.0,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 62.3,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -10502,22 +10502,22 @@
"easy": {
"accuracy": 47.8,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 24.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 24.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 60.0
},
"all": {
"accuracy": 31.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 77.8
}
},
"subcategories": {
@@ -10526,22 +10526,22 @@
"easy": {
"accuracy": 47.8,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 24.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 24.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 60.0
},
"all": {
"accuracy": 31.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 77.8
}
}
}
@@ -10552,22 +10552,22 @@
"easy": {
"accuracy": 57.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 89.7
},
"medium": {
"accuracy": 12.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 90.5
},
"hard": {
"accuracy": 4.1,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 88.9
},
"all": {
"accuracy": 35.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 89.9
}
},
"subcategories": {
@@ -10576,22 +10576,22 @@
"easy": {
"accuracy": 50.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 90.9
},
"medium": {
"accuracy": 11.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 90.9
},
"hard": {
"accuracy": 2.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 80.0
},
"all": {
"accuracy": 27.1,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 88.9
}
}
},
@@ -10600,22 +10600,22 @@
"easy": {
"accuracy": 67.4,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 8.5,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 15.4,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 54.8,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -10624,7 +10624,7 @@
"easy": {
"accuracy": 61.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 71.4
},
"medium": {
"accuracy": 23.5,
@@ -10639,7 +10639,7 @@
"all": {
"accuracy": 53.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 71.4
}
}
},
@@ -10648,12 +10648,12 @@
"easy": {
"accuracy": 44.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 23.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 50.0
},
"hard": {
"accuracy": 15.4,
@@ -10663,7 +10663,7 @@
"all": {
"accuracy": 32.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 80.0
}
}
},
@@ -10672,12 +10672,12 @@
"easy": {
"accuracy": 40.9,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
},
"medium": {
"accuracy": 9.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 9.1,
@@ -10687,7 +10687,7 @@
"all": {
"accuracy": 23.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 80.0
}
}
},
@@ -10696,7 +10696,7 @@
"easy": {
"accuracy": 55.0,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 0.0,
@@ -10711,7 +10711,7 @@
"all": {
"accuracy": 44.0,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -10720,7 +10720,7 @@
"easy": {
"accuracy": 50.0,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 8.0,
@@ -10730,12 +10730,12 @@
"hard": {
"accuracy": 0.0,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 14.9,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -10746,22 +10746,22 @@
"easy": {
"accuracy": 57.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 76.7
},
"medium": {
"accuracy": 18.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 90.5
},
"hard": {
"accuracy": 1.9,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 85.7
},
"all": {
"accuracy": 39.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 81.7
}
},
"subcategories": {
@@ -10770,22 +10770,22 @@
"easy": {
"accuracy": 49.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 15.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 26.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -10794,22 +10794,22 @@
"easy": {
"accuracy": 57.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 70.6
},
"medium": {
"accuracy": 20.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 71.4
},
"hard": {
"accuracy": 2.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 83.3
},
"all": {
"accuracy": 42.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 72.3
}
}
},
@@ -10818,12 +10818,12 @@
"easy": {
"accuracy": 71.7,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 27.3,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -10833,7 +10833,7 @@
"all": {
"accuracy": 63.0,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -10844,22 +10844,22 @@
"easy": {
"accuracy": 52.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 15.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 80.0
},
"all": {
"accuracy": 28.1,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 95.8
}
},
"subcategories": {
@@ -10868,22 +10868,22 @@
"easy": {
"accuracy": 43.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 11.0,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0002,
- "robustness": 0
+ "robustness": 50.0
},
"all": {
"accuracy": 23.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 87.5
}
}
},
@@ -10892,22 +10892,22 @@
"easy": {
"accuracy": 59.6,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 22.4,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 35.4,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -10916,22 +10916,22 @@
"easy": {
"accuracy": 62.7,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 5.1,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 1.8,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"all": {
"accuracy": 26.0,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
}
}
}
@@ -10942,22 +10942,22 @@
"easy": {
"accuracy": 57.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 9.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 12.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 19.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 72.4
}
},
"subcategories": {
@@ -10966,22 +10966,22 @@
"easy": {
"accuracy": 57.3,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 9.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 12.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 66.7
},
"all": {
"accuracy": 19.4,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 72.4
}
}
}
@@ -10992,22 +10992,22 @@
"easy": {
"accuracy": 51.6,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 95.7
},
"medium": {
"accuracy": 10.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 83.3
},
"hard": {
"accuracy": 2.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 90.0
},
"all": {
"accuracy": 30.7,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 92.3
}
},
"subcategories": {
@@ -11016,12 +11016,12 @@
"easy": {
"accuracy": 60.6,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 6.9,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"hard": {
"accuracy": 0.0,
@@ -11031,7 +11031,7 @@
"all": {
"accuracy": 45.3,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
}
}
},
@@ -11040,22 +11040,22 @@
"easy": {
"accuracy": 48.9,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 90.9
},
"medium": {
"accuracy": 10.8,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 80.0
},
"hard": {
"accuracy": 2.2,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 90.0
},
"all": {
"accuracy": 25.5,
"cost": 0.0001,
- "robustness": 0
+ "robustness": 88.5
}
}
},
@@ -11064,7 +11064,7 @@
"easy": {
"accuracy": 42.9,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
},
"medium": {
"accuracy": 11.1,
@@ -11079,7 +11079,7 @@
"all": {
"accuracy": 38.5,
"cost": 0.0,
- "robustness": 0
+ "robustness": 100.0
}
}
}
diff --git a/src/data/routerMetrics/leaderboard.json b/src/data/routerMetrics/leaderboard.json
index bca1080..5324fd0 100644
--- a/src/data/routerMetrics/leaderboard.json
+++ b/src/data/routerMetrics/leaderboard.json
@@ -5,7 +5,7 @@
"Optimal Selection Score": 39.84,
"Optimal Cost Score": 72.998,
"Optimal Acc. Score": 49.05,
- "Robustness Score": 97.6,
+ "Robustness Score": 85.24,
"Latency Score": 10.7527,
"Accuracy": 32.01,
"Cost per 1k": 0.07
@@ -16,7 +16,7 @@
"Optimal Selection Score": 22.52,
"Optimal Cost Score": 46.322,
"Optimal Acc. Score": 81.96,
- "Robustness Score": null,
+ "Robustness Score": 54.07,
"Latency Score": null,
"Accuracy": 68.09,
"Cost per 1k": 0.54
@@ -27,7 +27,7 @@
"Optimal Selection Score": 2.68,
"Optimal Cost Score": 6.7697,
"Optimal Acc. Score": 78.63,
- "Robustness Score": 93.6,
+ "Robustness Score": 89.05,
"Latency Score": 1.4993,
"Accuracy": 67.21,
"Cost per 1k": 2.06
@@ -49,7 +49,7 @@
"Optimal Selection Score": 4.73,
"Optimal Cost Score": 38.3347,
"Optimal Acc. Score": 74.25,
- "Robustness Score": 97.5,
+ "Robustness Score": 94.29,
"Latency Score": 2.6954,
"Accuracy": 57.0,
"Cost per 1k": 0.34
@@ -60,7 +60,7 @@
"Optimal Selection Score": 3.44,
"Optimal Cost Score": 19.6178,
"Optimal Acc. Score": 78.18,
- "Robustness Score": 94.5,
+ "Robustness Score": 61.19,
"Latency Score": 27.027,
"Accuracy": 66.88,
"Cost per 1k": 0.15
@@ -71,7 +71,7 @@
"Optimal Selection Score": 3.83,
"Optimal Cost Score": 14.039,
"Optimal Acc. Score": 77.88,
- "Robustness Score": 44.5,
+ "Robustness Score": 49.29,
"Latency Score": 10.4167,
"Accuracy": 66.34,
"Cost per 1k": 0.21
@@ -82,7 +82,7 @@
"Optimal Selection Score": 1.55,
"Optimal Cost Score": 2.1367,
"Optimal Acc. Score": 76.81,
- "Robustness Score": null,
+ "Robustness Score": 55.91,
"Latency Score": null,
"Accuracy": 60.83,
"Cost per 1k": 4.1
@@ -93,7 +93,7 @@
"Optimal Selection Score": 99.72,
"Optimal Cost Score": 99.6314,
"Optimal Acc. Score": 68.76,
- "Robustness Score": 99.8,
+ "Robustness Score": 100.00,
"Latency Score": 0.4016,
"Accuracy": 47.04,
"Cost per 1k": 0.27
@@ -104,7 +104,7 @@
"Optimal Selection Score": 13.09,
"Optimal Cost Score": 25.4887,
"Optimal Acc. Score": 78.77,
- "Robustness Score": 51.3,
+ "Robustness Score": 83.33,
"Latency Score": 1.328,
"Accuracy": 58.69,
"Cost per 1k": 4.27
@@ -115,7 +115,7 @@
"Optimal Selection Score": 13.39,
"Optimal Cost Score": 24.4499,
"Optimal Acc. Score": 83.32,
- "Robustness Score": 96.9,
+ "Robustness Score": 80.00,
"Latency Score": 90.9091,
"Accuracy": 61.62,
"Cost per 1k": 4.83
@@ -126,7 +126,7 @@
"Optimal Selection Score": 4.79,
"Optimal Cost Score": 12.5426,
"Optimal Acc. Score": 79.33,
- "Robustness Score": 100.0,
+ "Robustness Score": 35.00,
"Latency Score": 0.1863,
"Accuracy": 67.28,
"Cost per 1k": 1.67
diff --git a/src/pages/LeaderboardPage.tsx b/src/pages/LeaderboardPage.tsx
index 932e06f..e1d6bf0 100644
--- a/src/pages/LeaderboardPage.tsx
+++ b/src/pages/LeaderboardPage.tsx
@@ -859,7 +859,7 @@ platforms.
synonyms, and typos) and measure how often the router selects the same model as
for the original query.
-
+
Higher values indicate greater stability under realistic input noise, reflecting
robust model selection.