` — domain string → numeric Q-ID.
+
+In `Pois.processOverture`, the website lookup acts as a fallback that fills in a wikidata ID when the feature doesn't have one natively, and then the existing QRank machinery takes over unchanged:
+
+```java
+String wikidata = sf.getString("wikidata"); // always null for Overture places theme
+if (wikidata == null) {
+ String website = /* first entry from sf.getList("websites") */;
+ wikidata = websiteQidDb.getQid(website); // domain → "Q2008530"
+}
+long qrank = (wikidata != null) ? qrankDb.get(wikidata) : 0;
+var qrankedZoom = QrankDb.assignZoom(qrankGrading, kind, qrank);
+```
+
+The full lookup chain is:
+
+```
+sf.websites[0] → domain → Q-ID (WebsiteQidDb)
+ Q-ID → qrank score (QrankDb)
+ qrank score → minZoom (assignZoom)
+```
+
+No changes are needed to `QrankDb` or `assignZoom` — `QrankDb.get(long)` already accepts a numeric ID. A place only benefits if it has a matching website entry *and* a QRank score; otherwise `qrank = 0` and behavior is identical to today.
diff --git a/tiles/generate-wikidata-website-qid.sh b/tiles/generate-wikidata-website-qid.sh
new file mode 100755
index 000000000..7741557b5
--- /dev/null
+++ b/tiles/generate-wikidata-website-qid.sh
@@ -0,0 +1,43 @@
+#!/bin/bash -ex
+
+# Generate wikidata-website-qid.csv.gz -- a mapping from website domain to Wikidata QID.
+#
+# Fetches the complete Wikidata P856 (official website) table from QLever,
+# extracts root domains, disambiguates multiple QIDs per domain by taking the
+# lowest Q-number, and writes a gzipped two-column CSV.
+#
+# Output: data/sources/wikidata-website-qid-YYYY-MM.csv.gz
+# Usage: ./generate-wikidata-website-qid.sh
+
+DATE=$(date +%Y-%m)
+OUTPUT="data/sources/wikidata-website-qid-${DATE}.csv.gz"
+TSV_TMP=$(mktemp /tmp/wikidata-p856-XXXXXX) && mv "$TSV_TMP" "${TSV_TMP}.tsv" && TSV_TMP="${TSV_TMP}.tsv"
+
+echo "Fetching Wikidata P856 (official website) from QLever..."
+curl \
+ -H "Accept: text/tab-separated-values" \
+ --data-urlencode "query=PREFIX wdt: SELECT ?item ?website WHERE { ?item wdt:P856 ?website }" \
+ --data-urlencode "send=2400000" \
+ "https://qlever.dev/api/wikidata" \
+ -o "$TSV_TMP"
+
+echo "Building domain -> QID mapping..."
+duckdb -c "
+COPY (
+ SELECT
+ regexp_extract(lower(\"?website\"), 'https?://(?:www\\.)?([^/>\?]+)', 1) AS domain,
+ arg_min(
+ regexp_extract(\"?item\", 'entity/(Q[0-9]+)', 1),
+ CAST(regexp_extract(\"?item\", 'Q([0-9]+)', 1) AS INTEGER)
+ ) AS qid
+ FROM read_csv('${TSV_TMP}', delim='\t', header=true, ignore_errors=true)
+ WHERE regexp_extract(\"?item\", 'entity/(Q[0-9]+)', 1) != ''
+ AND regexp_extract(lower(\"?website\"), 'https?://(?:www\\.)?([^/>\?]+)', 1) != ''
+ GROUP BY domain
+ ORDER BY domain
+) TO '/dev/stdout' (FORMAT CSV, HEADER true)
+" | gzip > "$OUTPUT"
+
+rm "$TSV_TMP"
+
+echo "Done: ${OUTPUT} ($(du -sh "$OUTPUT" | cut -f1))"
diff --git a/tiles/src/main/java/com/protomaps/basemap/Basemap.java b/tiles/src/main/java/com/protomaps/basemap/Basemap.java
index e31fb03b3..fb8549a0d 100644
--- a/tiles/src/main/java/com/protomaps/basemap/Basemap.java
+++ b/tiles/src/main/java/com/protomaps/basemap/Basemap.java
@@ -8,6 +8,7 @@
import com.onthegomap.planetiler.util.Downloader;
import com.protomaps.basemap.feature.CountryCoder;
import com.protomaps.basemap.feature.QrankDb;
+import com.protomaps.basemap.feature.WebsiteQidDb;
import com.protomaps.basemap.layers.Boundaries;
import com.protomaps.basemap.layers.Buildings;
import com.protomaps.basemap.layers.Earth;
@@ -38,7 +39,7 @@ public class Basemap extends ForwardingProfile {
private static final Logger LOGGER = LoggerFactory.getLogger(Basemap.class);
- public Basemap(QrankDb qrankDb, CountryCoder countryCoder, Clip clip,
+ public Basemap(QrankDb qrankDb, WebsiteQidDb websiteQidDb, CountryCoder countryCoder, Clip clip,
String layer) {
if (layer.isEmpty() || layer.equals(Boundaries.LAYER_NAME)) {
@@ -78,7 +79,7 @@ public Basemap(QrankDb qrankDb, CountryCoder countryCoder, Clip clip,
}
if (layer.isEmpty() || layer.equals(Pois.LAYER_NAME)) {
- var poi = new Pois(qrankDb);
+ var poi = new Pois(qrankDb, websiteQidDb);
registerHandler(poi);
registerSourceHandler("osm", poi::processOsm);
registerSourceHandler("pm:overture", poi::processOverture);
@@ -206,12 +207,12 @@ public static void main(String[] args) throws IOException {
}
private static void printVersion() {
- Basemap basemap = new Basemap(null, null, null, "");
+ Basemap basemap = new Basemap(null, null, null, null, "");
System.out.println(basemap.version());
}
private static void printHelp() {
- Basemap basemap = new Basemap(null, null, null, "");
+ Basemap basemap = new Basemap(null, null, null, null, "");
System.out.println(String.format("""
%s v%s
%s
@@ -317,6 +318,16 @@ static void run(Arguments args) throws IOException {
var qrankDb = QrankDb.fromCsv(qrankCsv);
+ Path websiteQidCsv = sourcesDir.resolve("wikidata-website-qid-2026-03.csv.gz");
+ if (!Files.exists(websiteQidCsv)) {
+ Downloader.create(planetiler.config())
+ .add("wikidata-website-qid",
+ "https://954.teczno.com/~migurski/tmp/wikidata-website-qid.csv.gz",
+ websiteQidCsv)
+ .run();
+ }
+ var websiteQidDb = WebsiteQidDb.fromCsv(websiteQidCsv);
+
if (!Files.exists(pgfEncodingZip)) {
Downloader.create(planetiler.config())
.add("pgf-encoding", "https://wipfli.github.io/pgf-encoding/pgf-encoding.zip", pgfEncodingZip)
@@ -375,7 +386,7 @@ static void run(Arguments args) throws IOException {
outputName = area;
}
- planetiler.setProfile(new Basemap(qrankDb, countryCoder, clip, layer))
+ planetiler.setProfile(new Basemap(qrankDb, websiteQidDb, countryCoder, clip, layer))
.setOutput(Path.of(outputName + ".pmtiles"))
.run();
}
diff --git a/tiles/src/main/java/com/protomaps/basemap/feature/WebsiteQidDb.java b/tiles/src/main/java/com/protomaps/basemap/feature/WebsiteQidDb.java
new file mode 100644
index 000000000..96c9586e7
--- /dev/null
+++ b/tiles/src/main/java/com/protomaps/basemap/feature/WebsiteQidDb.java
@@ -0,0 +1,79 @@
+package com.protomaps.basemap.feature;
+
+import java.io.*;
+import java.nio.file.Path;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * An in-memory mapping from website domain to Wikidata Q-ID, used to enrich Overture POIs (which lack native wikidata
+ * fields) for QRank-based zoom assignment.
+ *
+ * Parses a gzipped CSV with columns {@code domain,qid} into a HashMap for efficient lookup.
+ **/
+public final class WebsiteQidDb {
+
+ private final Map db;
+
+ public WebsiteQidDb(Map db) {
+ this.db = db;
+ }
+
+ /**
+ * Extracts the root domain from a URL and looks up the corresponding Wikidata Q-ID.
+ *
+ * @param url a full URL such as "https://www.iflyoak.com/flights"
+ * @return a Wikidata Q-ID string like "Q1165584", or null if not found
+ */
+ public String getQid(String url) {
+ if (url == null || url.isEmpty()) {
+ return null;
+ }
+ String domain = url;
+ // Strip protocol
+ if (domain.startsWith("https://")) {
+ domain = domain.substring("https://".length());
+ } else if (domain.startsWith("http://")) {
+ domain = domain.substring("http://".length());
+ }
+ // Strip www. prefix
+ if (domain.startsWith("www.")) {
+ domain = domain.substring("www.".length());
+ }
+ // Take portion up to first /
+ int slash = domain.indexOf('/');
+ if (slash >= 0) {
+ domain = domain.substring(0, slash);
+ }
+ Long id = db.get(domain);
+ return id != null ? "Q" + id : null;
+ }
+
+ public static WebsiteQidDb fromCsv(Path csvPath) throws IOException {
+ GZIPInputStream gzip = new GZIPInputStream(new FileInputStream(csvPath.toFile()));
+ try (BufferedReader br = new BufferedReader(new InputStreamReader(gzip))) {
+ String content;
+ Map db = new HashMap<>();
+ String header = br.readLine(); // header
+ assert (header.equals("domain,qid"));
+ while ((content = br.readLine()) != null) {
+ int lastComma = content.lastIndexOf(',');
+ if (lastComma < 0) {
+ continue;
+ }
+ String domain = content.substring(0, lastComma);
+ String qid = content.substring(lastComma + 1);
+ if (qid.startsWith("Q")) {
+ qid = qid.substring(1);
+ }
+ try {
+ db.put(domain, Long.parseLong(qid));
+ } catch (NumberFormatException e) {
+ // skip malformed rows
+ }
+ }
+ return new WebsiteQidDb(db);
+ }
+ }
+}
diff --git a/tiles/src/main/java/com/protomaps/basemap/layers/Pois.java b/tiles/src/main/java/com/protomaps/basemap/layers/Pois.java
index bc9d13af4..024687e0c 100644
--- a/tiles/src/main/java/com/protomaps/basemap/layers/Pois.java
+++ b/tiles/src/main/java/com/protomaps/basemap/layers/Pois.java
@@ -22,6 +22,7 @@
import com.protomaps.basemap.feature.FeatureId;
import com.protomaps.basemap.feature.Matcher;
import com.protomaps.basemap.feature.QrankDb;
+import com.protomaps.basemap.feature.WebsiteQidDb;
import com.protomaps.basemap.names.OsmNames;
import java.util.List;
import java.util.Map;
@@ -31,17 +32,22 @@ public class Pois implements ForwardingProfile.LayerPostProcessor {
private Map qrankGrading = Map.of(
"station", new int[][]{{10, 50000}, {12, 20000}, {13, 10000}},
- "aerodrome", new int[][]{{10, 50000}, {12, 20000}, {13, 5000}, {14, 2500}},
+ "aerodrome", new int[][]{{10, 200000}, {11, 100000}, {12, 20000}, {13, 5000}, {14, 2500}},
"park", new int[][]{{11, 20000}, {12, 10000}, {13, 5000}, {14, 2500}},
"peak", new int[][]{{11, 20000}, {12, 10000}, {13, 5000}, {14, 2500}},
- "attraction", new int[][]{{12, 40000}, {13, 20000}, {14, 10000}},
- "university", new int[][]{{12, 40000}, {13, 20000}, {14, 10000}}
+ "attraction", new int[][]{{12, 40000}, {13, 20000}, {14, 5000}},
+ "university", new int[][]{{12, 2000000}, {13, 500000}, {14, 10000}},
+ "college", new int[][]{{12, 2000000}, {13, 500000}, {14, 10000}},
+ "zoo", new int[][]{{12, 10000}, {13, 5000}, {14, 2500}},
+ "museum", new int[][]{{13, 20000}, {14, 5000}}
);
private QrankDb qrankDb;
+ private WebsiteQidDb websiteQidDb;
- public Pois(QrankDb qrankDb) {
+ public Pois(QrankDb qrankDb, WebsiteQidDb websiteQidDb) {
this.qrankDb = qrankDb;
+ this.websiteQidDb = websiteQidDb;
}
public static final String LAYER_NAME = "pois";
@@ -542,6 +548,37 @@ public void processOsm(SourceFeature sf, FeatureCollector features) {
outputFeature.setPointLabelGridSizeAndLimit(14, 8, 1);
}
+ // Categories where the Overture feature IS the institution itself, so its website
+ // reliably resolves to that institution's Wikidata QID rather than a corporate brand.
+ // Excluded: air_transport_facility_service, transportation_location, travel_service, etc.
+ // — these are sub-facilities or branded counters whose websites resolve to the airline/
+ // brand entity (e.g. jetblue.com → Q161086 JetBlue Airways), producing spuriously high
+ // QRank scores that push check-in counters and baggage claims to early zoom levels.
+ private static final java.util.Set WEBSITE_QID_ELIGIBLE_CATEGORIES = java.util.Set.of(
+ "airport", // the airport itself, not airline counters inside it
+ "zoo", // institution-level feature
+ "museum", // institution-level feature
+ "art_museum", // institution-level feature
+ "college_university", // institution-level feature
+ "university", // institution-level feature
+ "park", // institution-level feature
+ "national_park", // institution-level feature
+ "aquarium", // institution-level feature
+ "botanical_garden", // institution-level feature
+ "stadium", // institution-level feature
+ "library" // institution-level feature
+ );
+
+ // Minimum confidence for website→QID lookup. Low-confidence features are often
+ // brand counters or services miscategorised as the institution (e.g. JetBlue at 0.32
+ // tagged basic_category=airport). Real airports/zoos/etc. cluster at 0.90+.
+ private static final double WEBSITE_QID_MIN_CONFIDENCE = 0.9;
+
+ private static boolean isWebsiteQidEligible(String basicCategory, double confidence) {
+ return basicCategory != null && WEBSITE_QID_ELIGIBLE_CATEGORIES.contains(basicCategory) &&
+ confidence >= WEBSITE_QID_MIN_CONFIDENCE;
+ }
+
public void processOverture(SourceFeature sf, FeatureCollector features) {
// Filter by type field - Overture transportation theme
if (!"places".equals(sf.getString("theme"))) {
@@ -569,8 +606,17 @@ public void processOverture(SourceFeature sf, FeatureCollector features) {
return;
}
- // QRank may override minZoom entirely
+ // QRank may override minZoom entirely.
+ // Website→QID lookup is restricted to categories where the feature IS the institution
+ // (airport, zoo, museum, etc.) — not sub-facilities of branded services where the
+ // website resolves to a corporate brand entity rather than the specific place.
String wikidata = sf.getString("wikidata");
+ if (wikidata == null && websiteQidDb != null && isWebsiteQidEligible(sf.getString("basic_category"), confidence)) {
+ Object websitesObj = sf.getTag("websites");
+ if (websitesObj instanceof List> websites && !((List>) websites).isEmpty()) {
+ wikidata = websiteQidDb.getQid(websites.get(0).toString());
+ }
+ }
long qrank = (wikidata != null) ? qrankDb.get(wikidata) : 0;
var qrankedZoom = QrankDb.assignZoom(qrankGrading, kind, qrank);
diff --git a/tiles/src/test/java/com/protomaps/basemap/feature/WebsiteQidDbTest.java b/tiles/src/test/java/com/protomaps/basemap/feature/WebsiteQidDbTest.java
new file mode 100644
index 000000000..d28a870e0
--- /dev/null
+++ b/tiles/src/test/java/com/protomaps/basemap/feature/WebsiteQidDbTest.java
@@ -0,0 +1,73 @@
+package com.protomaps.basemap.feature;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.nio.file.Path;
+import java.util.Map;
+import org.junit.jupiter.api.Test;
+
+class WebsiteQidDbTest {
+
+ private WebsiteQidDb dbFromFixture() throws IOException, URISyntaxException {
+ var resource = getClass().getClassLoader().getResource("website_qid_fixture.csv.gz");
+ assertNotNull(resource, "Test fixture not found: website_qid_fixture.csv.gz");
+ return WebsiteQidDb.fromCsv(Path.of(resource.toURI()));
+ }
+
+ @Test
+ void parsesFixtureCsv() throws IOException, URISyntaxException {
+ var db = dbFromFixture();
+ assertEquals("Q2008530", db.getQid("http://www.oaklandzoo.org/"));
+ assertEquals("Q877714", db.getQid("https://museumca.org/"));
+ }
+
+ @Test
+ void stripsHttps() throws IOException, URISyntaxException {
+ var db = dbFromFixture();
+ assertEquals("Q2008530", db.getQid("https://oaklandzoo.org/"));
+ }
+
+ @Test
+ void stripsHttp() throws IOException, URISyntaxException {
+ var db = dbFromFixture();
+ assertEquals("Q2008530", db.getQid("http://oaklandzoo.org/"));
+ }
+
+ @Test
+ void stripsWww() throws IOException, URISyntaxException {
+ var db = dbFromFixture();
+ assertEquals("Q2008530", db.getQid("http://www.oaklandzoo.org/"));
+ }
+
+ @Test
+ void stripsPath() throws IOException, URISyntaxException {
+ var db = dbFromFixture();
+ assertEquals("Q877714", db.getQid("https://museumca.org/visit/hours"));
+ }
+
+ @Test
+ void missingDomainReturnsNull() throws IOException, URISyntaxException {
+ var db = dbFromFixture();
+ assertNull(db.getQid("https://example.com/"));
+ }
+
+ @Test
+ void nullUrlReturnsNull() {
+ var db = new WebsiteQidDb(Map.of());
+ assertNull(db.getQid(null));
+ }
+
+ @Test
+ void emptyUrlReturnsNull() {
+ var db = new WebsiteQidDb(Map.of());
+ assertNull(db.getQid(""));
+ }
+
+ @Test
+ void inMemoryConstructor() {
+ var db = new WebsiteQidDb(Map.of("iflyoak.com", 1165584L));
+ assertEquals("Q1165584", db.getQid("https://www.iflyoak.com/flights"));
+ }
+}
diff --git a/tiles/src/test/java/com/protomaps/basemap/layers/LayerTest.java b/tiles/src/test/java/com/protomaps/basemap/layers/LayerTest.java
index a376eb851..04a2fa09b 100644
--- a/tiles/src/test/java/com/protomaps/basemap/layers/LayerTest.java
+++ b/tiles/src/test/java/com/protomaps/basemap/layers/LayerTest.java
@@ -12,6 +12,7 @@
import com.protomaps.basemap.Basemap;
import com.protomaps.basemap.feature.CountryCoder;
import com.protomaps.basemap.feature.QrankDb;
+import com.protomaps.basemap.feature.WebsiteQidDb;
import java.util.List;
import java.util.Map;
import java.util.stream.StreamSupport;
@@ -26,9 +27,20 @@ abstract class LayerTest {
"{\"type\":\"FeatureCollection\",\"features\":[{\"type\":\"Feature\",\"properties\":{\"iso1A2\":\"US\",\"nameEn\":\"United States\"},\"geometry\":{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-124,47],[-124,25],[-71,25],[-71,47],[-124,47]]]]}}]}");
- final QrankDb qrankDb = new QrankDb(LongLongHashMap.from(new long[]{8888}, new long[]{100000}));
+ final QrankDb qrankDb = new QrankDb(LongLongHashMap.from(
+ new long[]{8888, 1165584, 2008530, 168756, 877714, 161086},
+ new long[]{100000, 140740, 12197, 1604223, 9227, 5000000}
+ ));
- final Basemap profile = new Basemap(qrankDb, countryCoder, null, "");
+ final WebsiteQidDb websiteQidDb = new WebsiteQidDb(Map.of(
+ "iflyoak.com", 1165584L, // Oakland Airport Q1165584
+ "oaklandzoo.org", 2008530L, // Oakland Zoo Q2008530
+ "berkeley.edu", 168756L, // UC Berkeley Q168756
+ "museumca.org", 877714L, // OMCA Q877714
+ "jetblue.com", 161086L // JetBlue Airways Q161086 (airline brand, not a place)
+ ));
+
+ final Basemap profile = new Basemap(qrankDb, websiteQidDb, countryCoder, null, "");
static void assertFeatures(int zoom, List