diff --git a/.github/scripts/benchmark_maps.sh b/.github/scripts/benchmark_maps.sh index 26e483ec..1afbafb6 100755 --- a/.github/scripts/benchmark_maps.sh +++ b/.github/scripts/benchmark_maps.sh @@ -9,7 +9,6 @@ BASE_BIN=$2 OUT_JSON=$3 MAP_ROOT="sample_data" -WARMUP=1 MIN_RUNS=6 MAX_RUNS=15 @@ -81,10 +80,10 @@ for dir in "$MAP_ROOT"/*; do echo "Benchmarking: $map_name" hyperfine --ignore-failure \ - --warmup "$WARMUP" \ --min-runs "$MIN_RUNS" \ --max-runs "$MAX_RUNS" \ --export-json "hf.json" \ + --prepare 'bash -lc "if [ -e /proc/sys/vm/drop_caches ]; then sudo -n sh -c '\''sync; echo 3 > /proc/sys/vm/drop_caches'\'' >/dev/null 2>&1 || true; fi"' \ --command-name main \ "bash -c '[[ \$4 =~ world ]] && set -- \"\$1\" \"\$2\" \"\$3\" --world || set -- \"\$1\" \"\$2\" \"\$3\"; exec \"\$@\"' _ \ \"$BASE_BIN\" \"$geo\" \"$csv\" \"$dir\"" \ diff --git a/include/inset_state.hpp b/include/inset_state.hpp index df3fb5cb..c63dc39c 100644 --- a/include/inset_state.hpp +++ b/include/inset_state.hpp @@ -116,6 +116,8 @@ class InsetState // Whether convergence has been reached mutable bool converge_{true}; + mutable bool intersections_found_{false}; + // Make default constructor private so that only // InsetState(const std::string, Arguments) can be called as constructor InsetState(); diff --git a/include/parse_arguments.hpp b/include/parse_arguments.hpp index 063ebb49..2bc9efde 100644 --- a/include/parse_arguments.hpp +++ b/include/parse_arguments.hpp @@ -35,7 +35,7 @@ struct Arguments { bool qtdt_method; // Should the polygons be simplified and densified? - bool simplify; + bool disable_simplification_densification; // If `rays` is true, we use the ray-shooting method to fill the grid cells. bool rays; @@ -72,7 +72,11 @@ struct Arguments { bool verbose; - bool disable_triangulation_optimisation; + // Timeout in seconds + unsigned int timeout_in_seconds; + + // Whether to exit gracefully if intersections are found + bool do_not_fail_on_intersections; // Column names in provided visual variables file (CSV) std::optional id_col; diff --git a/include/time_tracker.hpp b/include/time_tracker.hpp index c2ebd507..354cd6b9 100644 --- a/include/time_tracker.hpp +++ b/include/time_tracker.hpp @@ -13,7 +13,12 @@ class TimeTracker std::unordered_map durations_; std::string name_; + std::chrono::steady_clock::time_point program_start_; + public: + TimeTracker(); + explicit TimeTracker(std::string name); + void set_name(std::string); void start(const std::string &task_name); void stop(const std::string &task_name); @@ -22,6 +27,9 @@ class TimeTracker // Find the duration of a particular task std::chrono::milliseconds duration(const std::string &task_name) const; + + // Total elapsed time from the CTOR of the object till now in seconds + double total_elapsed_time_in_seconds() const; }; #endif // TIME_TRACKER_H diff --git a/run_maps.sh b/run_maps.sh index c295292b..ac1e5ac7 100755 --- a/run_maps.sh +++ b/run_maps.sh @@ -46,7 +46,7 @@ for map_dir in */; do extra="--world" fi - command="../build/Release/cartogram \"../sample_data/${geojson_file}\" \"../sample_data/${csv_file}\" $extra" + command="../build/Debug/cartogram \"../sample_data/${geojson_file}\" \"../sample_data/${csv_file}\" $extra" echo "Command: $command" # Show the full path from the script location for the current map folder diff --git a/src/cartogram_info/read_csv.cpp b/src/cartogram_info/read_csv.cpp index f7b106ea..3a175ff3 100644 --- a/src/cartogram_info/read_csv.cpp +++ b/src/cartogram_info/read_csv.cpp @@ -221,7 +221,7 @@ static void check_validity_of_csv_ids( for (const auto &id : initial_id_order) { if (std::find(csv_ids.begin(), csv_ids.end(), id) == csv_ids.end()) { - std::cerr << "Warning: ID " << id << " in GeoJSON is not in CSV" + std::cerr << "WARNING: ID " << id << " in GeoJSON is not in CSV" << std::endl; csv_data[id] = {{"area", "NA"}, {"color", ""}, {"label", ""}, {"inset_pos", "C"}}; diff --git a/src/inset_state/check_topology.cpp b/src/inset_state/check_topology.cpp index 5272b1d7..30f6a13d 100644 --- a/src/inset_state/check_topology.cpp +++ b/src/inset_state/check_topology.cpp @@ -30,13 +30,21 @@ void InsetState::holes_inside_polygons() const void InsetState::is_simple(const char *caller_func) const { - if (!args_.simplify) + if (args_.disable_simplification_densification) return; // Only check topology if simplification and densification is enabled. for (const auto &gd : geo_divs_) { for (const auto &pwh : gd.polygons_with_holes()) { if (!pwh.outer_boundary().is_simple()) { + intersections_found_ = true; + if (args_.do_not_fail_on_intersections) { + std::cerr << "WARNING: Outer boundary is not simple for GeoDiv " + << gd.id(); + std::cerr << ". is_simple() called from " << caller_func + << std::endl; + continue; + } not_simple_polygon_ = pwh.outer_boundary(); std::cerr << "ERROR: Outer boundary is not simple for GeoDiv " << gd.id(); @@ -47,8 +55,16 @@ void InsetState::is_simple(const char *caller_func) const false); exit(1); } + for (const auto &h : pwh.holes()) { if (!h.is_simple()) { + intersections_found_ = true; + if (args_.do_not_fail_on_intersections) { + std::cerr << "WARNING: Hole is not simple for GeoDiv " << gd.id(); + std::cerr << ". is_simple() called from " << caller_func + << std::endl; + continue; + } not_simple_polygon_ = h; std::cerr << "ERROR: Hole is not simple for GeoDiv " << gd.id(); std::cerr << ". is_simple() called from " << caller_func diff --git a/src/inset_state/inset_state.cpp b/src/inset_state/inset_state.cpp index afd332e2..7a746671 100644 --- a/src/inset_state/inset_state.cpp +++ b/src/inset_state/inset_state.cpp @@ -84,61 +84,6 @@ double InsetState::blur_width() const return blur_width; } -bool InsetState::continue_integrating() const -{ - - // Calculate all the necessary information to decide whether to continue - auto [max_area_err, worst_gd] = max_area_error(); - - // A GeoDiv is still above our area error threshold - bool area_error_above_threshold = - max_area_err > args_.max_permitted_area_error; - - // Area expansion factor is above our threshold - // i.e. cartogram has become too big or too small - double area_drift = area_expansion_factor() - 1.0; - bool area_expansion_factor_above_threshold = - std::abs(area_drift) > max_permitted_area_drift; - - // If both the above metrics are above our threshold - bool has_converged = - !area_error_above_threshold && !area_expansion_factor_above_threshold; - - // Make sure to not continue endlesslely: cap at max_integrations - bool within_integration_limit = n_finished_integrations() < max_integrations; - - // We continue if we are within the integration limit and have not converged - bool continue_integration = - (within_integration_limit && !has_converged) || - (n_finished_integrations_ < args_.min_integrations); - - // Actually hasn't converged, just reached integration limit - if (!within_integration_limit && !has_converged) { - converge_ = false; - std::cerr << "ERROR: Could not converge!" << std::endl; - if (area_error_above_threshold) - std::cerr << "Max area error above threshold!" << std::endl; - if (area_expansion_factor_above_threshold) - std::cerr << "Area expansion factor above threshold!" << std::endl; - } - - // Print control output (at end of previous integration) - std::cerr << "Max. area err: " << max_area_err << ", GeoDiv: " << worst_gd - << std::endl; - std::cerr << "Current Area: " << geo_div_at_id(worst_gd).area() - << ", Target Area: " << target_area_at(worst_gd) << std::endl; - std::cerr << "Area drift: " << area_drift * 100.0 << "%" << std::endl; - - if (continue_integration) { - // Print next integration information. - std::cerr << "\nIntegration number " << n_finished_integrations() - << std::endl; - std::cerr << "Dimensions : " << lx_ << " " << ly_ << std::endl; - std::cerr << "Number of Points: " << n_points() << std::endl; - } - return continue_integration; -} - Color InsetState::color_at(const std::string &id) const { try { diff --git a/src/inset_state/integrate.cpp b/src/inset_state/integrate.cpp index a3ec4c9c..81ba7efd 100644 --- a/src/inset_state/integrate.cpp +++ b/src/inset_state/integrate.cpp @@ -17,7 +17,7 @@ void InsetState::preprocess() store_original_geo_divs(); } - if (args_.simplify) { + if (!args_.disable_simplification_densification) { std::cerr << "Start of initial simplification of " << pos_ << std::endl; // Simplification reduces the number of points used to represent the @@ -78,6 +78,73 @@ void InsetState::cleanup_after_integration() ref_to_fluxy_init().free(); } +bool InsetState::continue_integrating() const +{ + if (intersections_found_) { + std::cerr << "WARNING: Polygons do not remain simple during integration! " + "Exiting gracefully." + << std::endl; + return false; + } + + // Calculate all the necessary information to decide whether to continue + auto [max_area_err, worst_gd] = max_area_error(); + + // A GeoDiv is still above our area error threshold + bool area_error_above_threshold = + max_area_err > args_.max_permitted_area_error; + + // Area expansion factor is above our threshold + // i.e. cartogram has become too big or too small + double area_drift = area_expansion_factor() - 1.0; + bool area_expansion_factor_above_threshold = + std::abs(area_drift) > max_permitted_area_drift; + + // If both the above metrics are above our threshold + bool has_converged = + !area_error_above_threshold && !area_expansion_factor_above_threshold; + + // Make sure to not continue endlesslely: cap at max_integrations + bool within_integration_limit = n_finished_integrations() < max_integrations; + + // We continue if we are within the integration limit and have not converged + bool continue_integration = + (within_integration_limit && !has_converged) || + (n_finished_integrations_ < args_.min_integrations); + + // Actually hasn't converged, just reached integration limit + if (!within_integration_limit && !has_converged) { + converge_ = false; + std::cerr << "ERROR: Could not converge!" << std::endl; + if (area_error_above_threshold) + std::cerr << "Max area error above threshold!" << std::endl; + if (area_expansion_factor_above_threshold) + std::cerr << "Area expansion factor above threshold!" << std::endl; + } + + // Print control output (at end of previous integration) + std::cerr << "Max. area err: " << max_area_err << ", GeoDiv: " << worst_gd + << std::endl; + std::cerr << "Current Area: " << geo_div_at_id(worst_gd).area() + << ", Target Area: " << target_area_at(worst_gd) << std::endl; + std::cerr << "Area drift: " << area_drift * 100.0 << "%" << std::endl; + + if (timer.total_elapsed_time_in_seconds() > args_.timeout_in_seconds) { + std::cerr << "WARNING: Timeout of " << args_.timeout_in_seconds + << " seconds reached!" << std::endl; + return false; + } + + if (continue_integration) { + // Print next integration information. + std::cerr << "\nIntegration number " << n_finished_integrations() + << std::endl; + std::cerr << "Dimensions : " << lx_ << " " << ly_ << std::endl; + std::cerr << "Number of Points: " << n_points() << std::endl; + } + return continue_integration; +} + void InsetState::integrate(ProgressTracker &progress_tracker) { diff --git a/src/inset_state/project.cpp b/src/inset_state/project.cpp index cb0058bd..89333d0b 100644 --- a/src/inset_state/project.cpp +++ b/src/inset_state/project.cpp @@ -8,7 +8,7 @@ bool InsetState::project() if (!create_delaunay_t()) // Triangle has flipped during triangulation return false; - if (args_.simplify) { + if (!args_.disable_simplification_densification) { densify_geo_divs_using_delaunay_t(); } @@ -28,7 +28,7 @@ bool InsetState::project() true); } - if (args_.simplify) { + if (!args_.disable_simplification_densification) { simplify(args_.target_points_per_inset); } diff --git a/src/inset_state/simplify_inset.cpp b/src/inset_state/simplify_inset.cpp index abf1dc1a..59febe1c 100644 --- a/src/inset_state/simplify_inset.cpp +++ b/src/inset_state/simplify_inset.cpp @@ -16,6 +16,13 @@ using Cost = CGAL::Polyline_simplification_2::Squared_distance_cost; void InsetState::simplify(const unsigned int target_points_per_inset) { + if (intersections_found_) { + std::cerr + << "WARNING: Intersections found previously; skipping simplification!" + << std::endl; + return; + } + timer.start("Simplification"); const size_t n_pts_before = n_points(); std::cerr << n_pts_before << " points in inset. "; diff --git a/src/misc/parse_arguments.cpp b/src/misc/parse_arguments.cpp index 49125cfb..79bc055d 100644 --- a/src/misc/parse_arguments.cpp +++ b/src/misc/parse_arguments.cpp @@ -1,5 +1,6 @@ #include "parse_arguments.hpp" #include "constants.hpp" +#include Arguments parse_arguments(const int argc, const char *argv[]) { @@ -7,7 +8,7 @@ Arguments parse_arguments(const int argc, const char *argv[]) // Create parser for arguments using argparse. // From https://github.com/p-ranav/argparse - argparse::ArgumentParser arguments("./cartogram", "25.7"); + argparse::ArgumentParser arguments("./cartogram", "25.8"); // Positional argument accepting geometry file (GeoJSON, JSON) as input arguments.add_argument("geometry_file") @@ -28,6 +29,12 @@ Arguments parse_arguments(const int argc, const char *argv[]) "Integer: Number of starting grid cells along longer Cartesian " "coordinate axis"); + arguments.add_argument("-T", "--timeout") + .default_value(std::numeric_limits::max()) + .scan<'u', unsigned int>() + .help( + "Integer: Maximum time (in seconds) allowed for cartogram generation"); + arguments.add_argument("-N", "--max_allowed_autoscale_grid_length") .default_value(max_allowed_autoscale_grid_length) .scan<'u', unsigned int>() @@ -71,10 +78,6 @@ Arguments parse_arguments(const int argc, const char *argv[]) "polygons") .default_value(false) .implicit_value(true); - arguments.add_argument("--disable_triangulation_optimisation") - .help("Boolean: Disable optimisation of maximum angle of triangulation") - .default_value(false) - .implicit_value(true); arguments.add_argument("--skip_projection") .help("Boolean: Skip projection to equal area") .default_value(false) @@ -93,6 +96,14 @@ Arguments parse_arguments(const int argc, const char *argv[]) // Currently, this help message is not accurate. .default_value(false) .implicit_value(true); + + arguments.add_argument("--do_not_fail_on_intersections") + .help( + "Boolean: Whether to still produce cartogram if polygons do not remain " + "simple") + .default_value(false) + .implicit_value(true); + arguments.add_argument("--output_shifted_insets") .help( "Boolean: Output repositioned insets in cartesian coordinates GeoJSON") @@ -190,9 +201,8 @@ Arguments parse_arguments(const int argc, const char *argv[]) // Set boolean values args.world = arguments.get("--world"); - args.simplify = !arguments.get("--disable_simplify_and_densify"); - args.disable_triangulation_optimisation = - arguments.get("--disable_triangulation_optimisation"); + args.disable_simplification_densification = + arguments.get("--disable_simplify_and_densify"); args.remove_tiny_polygons = arguments.get("--remove_tiny_polygons"); args.min_polygon_area = arguments.get("--minimum_polygon_area"); args.max_permitted_area_error = @@ -211,6 +221,10 @@ Arguments parse_arguments(const int argc, const char *argv[]) args.plot_polygons = arguments.get("--plot_polygons"); args.plot_quadtree = arguments.get("--plot_quadtree"); args.output_shifted_insets = arguments.get("--output_shifted_insets"); + args.do_not_fail_on_intersections = + arguments.get("--do_not_fail_on_intersections"); + + args.timeout_in_seconds = arguments.get("--timeout"); // arguments.present returns an optional args.id_col = arguments.present("--id"); @@ -223,7 +237,7 @@ Arguments parse_arguments(const int argc, const char *argv[]) args.verbose = arguments.get("--verbose"); // Check whether n_points is specified but --simplify_and_densify not passed - if (!args.simplify) { + if (args.disable_simplification_densification) { std::cerr << "WARNING: Simplification and densification disabled! " << "Polygons will not simplified (or densified). " << "This may result and in polygon intersections. " diff --git a/src/misc/string_to_decimal_converter.cpp b/src/misc/string_to_decimal_converter.cpp index 29e6b322..63693ece 100644 --- a/src/misc/string_to_decimal_converter.cpp +++ b/src/misc/string_to_decimal_converter.cpp @@ -223,7 +223,7 @@ bool StringToDecimalConverter::is_comma_as_separator( } if (!point_as_separator_strs.empty()) { - std::cerr << "Warning: Cannot determine separator with certainty.\n"; + std::cerr << "WARNING: Cannot determine separator with certainty.\n"; std::cerr << "Comma as separator areas:\n"; std::cerr << "Contradictory example areas:\n"; std::cerr << " Comma as separator: " << comma_as_separator_strs[0] diff --git a/src/misc/time_tracker.cpp b/src/misc/time_tracker.cpp index 9d49ed96..0b16a9cf 100644 --- a/src/misc/time_tracker.cpp +++ b/src/misc/time_tracker.cpp @@ -3,9 +3,18 @@ #include #include +TimeTracker::TimeTracker() : program_start_(std::chrono::steady_clock::now()) +{ +} + +TimeTracker::TimeTracker(std::string name) + : name_(std::move(name)), program_start_(std::chrono::steady_clock::now()) +{ +} + void TimeTracker::set_name(std::string name) { - name_ = name; + name_ = std::move(name); } void TimeTracker::start(const std::string &task_name) @@ -67,4 +76,13 @@ std::chrono::milliseconds TimeTracker::duration( // Re-throw, or return a default value throw; } -} \ No newline at end of file +} + +double TimeTracker::total_elapsed_time_in_seconds() const +{ + using clock = std::chrono::steady_clock; + using seconds_d = std::chrono::duration; + + return std::chrono::duration_cast(clock::now() - program_start_) + .count(); +} diff --git a/tests/fuzzer/fuzzer.py b/tests/fuzzer/fuzzer.py old mode 100644 new mode 100755 index c4e51610..b0320b1f --- a/tests/fuzzer/fuzzer.py +++ b/tests/fuzzer/fuzzer.py @@ -116,9 +116,6 @@ def main(): # Run cmd = [carto_bin, geo_path, tmp_csv] - # Make sure the cartogram runs fast - cmd.extend(["--max_allowed_autoscale_grid_length", "512"]) - if "world" in args.single_map.lower(): cmd.append("--world") @@ -148,6 +145,9 @@ def main(): os.remove(tmp_csv) + if n_fail == 0 and os.path.isdir(fail_dir): + os.rmdir(fail_dir) + print("\nFuzzer summary:", "FAILURES detected." if n_fail else "All passed.") print() return 1 if n_fail else 0