1+ package examples ;
2+
3+ import jnr .ffi .Pointer ;
4+ import jnr .ffi .Memory ;
5+ import jnr .ffi .Runtime ;
6+
7+ import java .io .*;
8+ import java .util .*;
9+
10+ import static functions .functions .*;
11+
12+ /**
13+ * Demonstrates K-means spatial clustering on populated places.
14+ *
15+ * K-means clustering groups geographic points into K clusters based on proximity.
16+ * Each cluster contains points that are close to each other spatially.
17+ *
18+ * This program:
19+ * 1. Reads populated places from CSV (name, population, location)
20+ * 2. Applies K-means clustering to group places into 10 clusters
21+ * 3. Writes results with cluster assignments to output CSV
22+ *
23+ * SQL Equivalent in PostGIS:
24+ * <pre>
25+ * SELECT name, pop_max, geom,
26+ * ST_ClusterKMeans(geom, 10) OVER () AS cluster
27+ * FROM popplaces;
28+ * </pre>
29+ * https://postgis.net/docs/ST_ClusterKMeans.html
30+ *
31+ * Use Cases:
32+ * - Grouping cities by geographic region
33+ * - Optimizing location-based services
34+ *
35+ * Data Source: Natural Earth 1:10M Populated Places
36+ * https://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-populated-places/
37+ */
38+ public class N16_Clustering_KMeans {
39+
40+ // Configuration
41+ private static final int MAX_ROWS = 50000 ;
42+ private static final int BATCH_SIZE = 1000 ;
43+ private static final int NUM_CLUSTERS = 10 ;
44+ private static final int NUM_SAMPLES = 5 ;
45+ private static final String DATA_DIR = "src/main/java/examples/data/" ;
46+ private static final String INPUT_FILE = DATA_DIR + "popplaces.csv" ;
47+ private static final String OUTPUT_FILE = DATA_DIR + "popplaces_clustered.csv" ;
48+
49+ /**
50+ * Record representing a populated place
51+ */
52+ static class PopulatedPlace {
53+ String name ;
54+ int population ;
55+ Pointer geom ;
56+ int cluster ;
57+
58+ PopulatedPlace (String name , int population , Pointer geom ) {
59+ this .name = name ;
60+ this .population = population ;
61+ this .geom = geom ;
62+ this .cluster = -1 ;
63+ }
64+ }
65+
66+ /**
67+ * Read populated places from CSV file
68+ */
69+ private static List <PopulatedPlace > readPlaces () throws IOException {
70+ List <PopulatedPlace > places = new ArrayList <>();
71+ int nullCount = 0 ;
72+
73+ System .out .println ("\n " + "=" .repeat (60 ));
74+ System .out .println ("Reading populated places from CSV" );
75+ System .out .println ("=" .repeat (60 ));
76+ System .out .println ("Reading (one '*' every " + BATCH_SIZE + " records):" );
77+ System .out .print (" " );
78+
79+ try (BufferedReader br = new BufferedReader (new FileReader (INPUT_FILE ))) {
80+ // Skip header
81+ String header = br .readLine ();
82+
83+ String line ;
84+ int lineNumber = 0 ;
85+
86+ while ((line = br .readLine ()) != null && places .size () < MAX_ROWS ) {
87+ lineNumber ++;
88+
89+ // Parse CSV: name,pop_max,geom
90+ String [] parts = line .split ("," , 3 );
91+
92+ if (parts .length != 3 ) {
93+ System .out .println ("Record nb. " + lineNumber + " with missing values ignored" );
94+ nullCount ++;
95+ continue ;
96+ }
97+
98+ try {
99+ String name = parts [0 ].trim ();
100+ int population = Integer .parseInt (parts [1 ].trim ());
101+ String geomWKT = parts [2 ].trim ();
102+
103+ // Parse geometry
104+ Pointer geom = geom_in (geomWKT , -1 );
105+
106+ if (geom != null ) {
107+ places .add (new PopulatedPlace (name , population , geom ));
108+
109+ // Print progress marker
110+ if (places .size () % BATCH_SIZE == 0 ) {
111+ System .out .print ("*" );
112+ System .out .flush ();
113+ }
114+ } else {
115+ nullCount ++;
116+ }
117+ } catch (NumberFormatException e ) {
118+ nullCount ++;
119+ }
120+ }
121+ }
122+
123+ System .out .println ("\n \n ┌─ READING COMPLETE ───────────────────────" );
124+ System .out .println ("│ Records read: " + places .size ());
125+ System .out .println ("│ Incomplete records ignored: " + nullCount );
126+ System .out .println ("└──────────────────────────────────────────" );
127+
128+ return places ;
129+ }
130+
131+ /**
132+ * Apply K-means clustering to the places
133+ */
134+ private static void applyKMeansClustering (List <PopulatedPlace > places ) {
135+ System .out .println ("\n " + "=" .repeat (60 ));
136+ System .out .println ("Applying K-means Clustering" );
137+ System .out .println ("=" .repeat (60 ));
138+ System .out .println ("Algorithm: K-means" );
139+ System .out .println ("Number of clusters: " + NUM_CLUSTERS );
140+ System .out .println ("Number of points: " + places .size ());
141+
142+ long startTime = System .currentTimeMillis ();
143+
144+ // Create array of geometry pointers
145+ int count = places .size ();
146+ Runtime runtime = Runtime .getSystemRuntime ();
147+ Pointer geometriesPtr = Memory .allocate (runtime , count * 8 ); // 8 bytes per pointer
148+
149+ // Fill array with geometry pointers
150+ for (int i = 0 ; i < count ; i ++) {
151+ geometriesPtr .putPointer (i * 8 , places .get (i ).geom );
152+ }
153+
154+ // Apply K-means clustering
155+ Pointer clustersPtr = geo_cluster_kmeans (geometriesPtr , count , NUM_CLUSTERS );
156+
157+ long elapsed = System .currentTimeMillis () - startTime ;
158+
159+ // Read cluster assignments
160+ int [] clusters = new int [count ];
161+ clustersPtr .get (0 , clusters , 0 , count );
162+
163+ // Assign clusters to places
164+ for (int i = 0 ; i < count ; i ++) {
165+ places .get (i ).cluster = clusters [i ];
166+ }
167+
168+ System .out .println ("\n ┌─ CLUSTERING COMPLETE ────────────────────" );
169+ System .out .println ("│ Time taken: " + elapsed + " ms" );
170+ System .out .println ("│ Clusters created: " + NUM_CLUSTERS );
171+ System .out .println ("└──────────────────────────────────────────" );
172+ }
173+
174+
175+ /**
176+ * Display sample places from each cluster
177+ */
178+ private static void displaySamplePlaces (List <PopulatedPlace > places ) {
179+ System .out .println ("\n ┌─ SAMPLE PLACES PER CLUSTER ──────────────" );
180+
181+ for (int cluster = 0 ; cluster < NUM_CLUSTERS ; cluster ++) {
182+ System .out .println ("│" );
183+ System .out .println ("│ Cluster " + cluster + ":" );
184+
185+ final int currentCluster = cluster ;
186+ List <PopulatedPlace > clusterSamples = places .stream ()
187+ .filter (p -> p .cluster == currentCluster )
188+ .limit (NUM_SAMPLES ) // 5 samples
189+ .toList ();
190+
191+ for (PopulatedPlace place : clusterSamples ) {
192+ String geomStr = geo_out (place .geom );
193+ System .out .printf ("│ - %s (pop: %,d) %s\n " ,
194+ place .name , place .population , geomStr );
195+ }
196+
197+ if (clusterSamples .isEmpty ()) {
198+ System .out .println ("│ (No places assigned to this cluster)" );
199+ }
200+ }
201+
202+ System .out .println ("└──────────────────────────────────────────" );
203+ }
204+
205+ /**
206+ * Write clustered results to CSV file
207+ */
208+ private static void writeResults (List <PopulatedPlace > places ) throws IOException {
209+ System .out .println ("\n " + "=" .repeat (60 ));
210+ System .out .println ("Writing Results to CSV" );
211+ System .out .println ("=" .repeat (60 ));
212+
213+ try (BufferedWriter writer = new BufferedWriter (new FileWriter (OUTPUT_FILE ))) {
214+ // Write header
215+ writer .write ("name,pop_max,geom,cluster\n " );
216+
217+ // Write data
218+ for (PopulatedPlace place : places ) {
219+ String geomStr = geo_out (place .geom );
220+ writer .write (String .format ("%s,%d,%s,%d\n " ,
221+ place .name , place .population , geomStr , place .cluster ));
222+ }
223+ }
224+
225+ System .out .println ("\n ┌─ OUTPUT COMPLETE ────────────────────────" );
226+ System .out .println ("│ File: " + OUTPUT_FILE );
227+ System .out .println ("│ Records written: " + places .size ());
228+ System .out .println ("└──────────────────────────────────────────" );
229+ }
230+
231+ /**
232+ * Explain K-means clustering concept
233+ */
234+ private static void explainKMeans () {
235+ System .out .println ("\n ╔═══════════════════════════════════════════════════════════╗" );
236+ System .out .println ("║ K-MEANS CLUSTERING EXPLAINED ║" );
237+ System .out .println ("╚═══════════════════════════════════════════════════════════╝" );
238+ System .out .println ();
239+ System .out .println ("What is K-means Clustering?" );
240+ System .out .println ("──────────────────────────" );
241+ System .out .println ("K-means groups geographic points into K clusters where each" );
242+ System .out .println ("point belongs to the cluster with the nearest center (centroid)." );
243+ System .out .println ();
244+ System .out .println ("How it works:" );
245+ System .out .println ("─────────────" );
246+ System .out .println ("1. Choose K initial cluster centers (randomly or smart placement)" );
247+ System .out .println ("2. Assign each point to the nearest cluster center" );
248+ System .out .println ("3. Recalculate cluster centers (average position of all points)" );
249+ System .out .println ("4. Repeat steps 2-3 until centers don't move significantly" );
250+ System .out .println ();
251+ System .out .println ("Example with cities:" );
252+ System .out .println ("───────────────────" );
253+ System .out .println ();
254+ System .out .println (" Before clustering: After K-means (K=3):" );
255+ System .out .println (" ┌──────────────┐ ┌──────────────┐" );
256+ System .out .println (" │ • • • • • │ │ A A A B B │" );
257+ System .out .println (" │ • • • │ │ A A B │" );
258+ System .out .println (" │ • • • │ │ A A B │" );
259+ System .out .println (" │ • • • • │ │ C C C C │" );
260+ System .out .println (" │ • • • • │ │ C C C C │" );
261+ System .out .println (" └──────────────┘ └──────────────┘" );
262+ System .out .println ();
263+ System .out .println (" Random points 3 clear clusters!" );
264+ System .out .println ();
265+ System .out .println ("Use Cases:" );
266+ System .out .println ("──────────" );
267+ System .out .println ("✓ Delivery zones - Group customers by proximity" );
268+ System .out .println ("✓ Sales territories - Divide regions for sales teams" );
269+ System .out .println ("✓ Service areas - Emergency services coverage zones" );
270+ System .out .println ("✓ Urban planning - Identify city districts" );
271+ System .out .println ("✓ Retail analysis - Store location optimization" );
272+ System .out .println ();
273+ System .out .println ("Advantages:" );
274+ System .out .println ("───────────" );
275+ System .out .println ("• Fast and efficient" );
276+ System .out .println ("• Simple to understand" );
277+ System .out .println ("• Works well with large datasets" );
278+ System .out .println ("• Produces compact, spherical clusters" );
279+ System .out .println ();
280+ System .out .println ("Limitations:" );
281+ System .out .println ("────────────" );
282+ System .out .println ("• Must specify K (number of clusters) in advance" );
283+ System .out .println ("• Assumes roughly equal-sized, spherical clusters" );
284+ System .out .println ("• Sensitive to initial center placement" );
285+ System .out .println ("• May not work well with irregularly shaped regions" );
286+ System .out .println ();
287+ }
288+
289+ public static void main (String [] args ) {
290+ // Initialize MEOS
291+ meos_initialize ();
292+ meos_initialize_timezone ("UTC" );
293+
294+ try {
295+ System .out .println ("\n " + "=" .repeat (60 ));
296+ System .out .println ("K-MEANS SPATIAL CLUSTERING DEMONSTRATION" );
297+ System .out .println ("Populated Places Dataset" );
298+ System .out .println ("=" .repeat (60 ));
299+
300+ // Explain the concept
301+ explainKMeans ();
302+
303+ // Read data
304+ List <PopulatedPlace > places = readPlaces ();
305+
306+ if (places .isEmpty ()) {
307+ System .err .println ("No data loaded. Please ensure " + INPUT_FILE + " exists." );
308+ return ;
309+ }
310+
311+ // Apply clustering
312+ applyKMeansClustering (places );
313+
314+ // Display samples
315+ displaySamplePlaces (places );
316+
317+ // Write results
318+ writeResults (places );
319+
320+ System .out .println ("\n " + "=" .repeat (60 ));
321+ System .out .println ("CLUSTERING DEMONSTRATION COMPLETED!" );
322+ System .out .println ("=" .repeat (60 ));
323+ System .out .println ();
324+ System .out .println ("Key Takeaways:" );
325+ System .out .println ("──────────────" );
326+ System .out .println ("1. K-means groups points by proximity into K clusters" );
327+ System .out .println ("2. Useful for creating geographic zones and territories" );
328+ System .out .println ("3. Fast algorithm suitable for large datasets" );
329+ System .out .println ("4. Results depend on K value and initial placement" );
330+ System .out .println ();
331+
332+ } catch (IOException e ) {
333+ System .err .println ("Error: " + e .getMessage ());
334+ e .printStackTrace ();
335+ } finally {
336+ // Finalize MEOS
337+ meos_finalize ();
338+ }
339+ }
340+ }
0 commit comments