Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions tdigest/include/tdigest_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ tdigest(false, k, std::numeric_limits<T>::infinity(), -std::numeric_limits<T>::i
template<typename T, typename A>
void tdigest<T, A>::update(T value) {
if (std::isnan(value)) return;
if (std::isinf(value)) return;
if (buffer_.size() == centroids_capacity_ * BUFFER_MULTIPLIER) compress();
buffer_.push_back(value);
min_ = std::min(min_, value);
Expand Down Expand Up @@ -94,6 +95,7 @@ template<typename T, typename A>
double tdigest<T, A>::get_rank(T value) const {
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
if (std::isnan(value)) throw std::invalid_argument("operation is undefined for NaN");
if (std::isinf(value)) throw std::invalid_argument("operation is undefined for infinity");
Comment thread
proost marked this conversation as resolved.
Outdated
Comment thread
proost marked this conversation as resolved.
Outdated
if (value < min_) return 0;
if (value > max_) return 1;
// one centroid and value == min_ == max_
Expand Down Expand Up @@ -621,6 +623,9 @@ void tdigest<T, A>::check_split_points(const T* values, uint32_t size) {
if (std::isnan(values[i])) {
throw std::invalid_argument("Values must not be NaN");
}
if (std::isinf(values[i])) {
throw std::invalid_argument("Values must not be infinity");
}
if ((i < (size - 1)) && !(values[i] < values[i + 1])) {
throw std::invalid_argument("Values must be unique and monotonically increasing");
}
Expand Down
53 changes: 53 additions & 0 deletions tdigest/test/tdigest_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -470,4 +470,57 @@ TEST_CASE("iterate centroids", "[tdigest]") {
REQUIRE(td.get_total_weight() == total_weight);
}

TEST_CASE("update rejects positive infinity", "[tdigest]") {
tdigest_double td(100);
td.update(1.0);
td.update(2.0);
td.update(std::numeric_limits<double>::infinity());
REQUIRE(td.get_total_weight() == 2);
REQUIRE(td.get_max_value() == 2.0);
}

TEST_CASE("update rejects negative infinity", "[tdigest]") {
tdigest_double td(100);
td.update(1.0);
td.update(2.0);
td.update(-std::numeric_limits<double>::infinity());
REQUIRE(td.get_total_weight() == 2);
REQUIRE(td.get_min_value() == 1.0);
}

TEST_CASE("get_rank rejects positive infinity", "[tdigest]") {
tdigest_double td(100);
td.update(1.0);
td.update(2.0);
REQUIRE_THROWS_AS(td.get_rank(std::numeric_limits<double>::infinity()), std::invalid_argument);
}

TEST_CASE("get_rank rejects negative infinity", "[tdigest]") {
tdigest_double td(100);
td.update(1.0);
td.update(2.0);
REQUIRE_THROWS_AS(td.get_rank(-std::numeric_limits<double>::infinity()), std::invalid_argument);
}

TEST_CASE("get_CDF rejects positive infinity in split points", "[tdigest]") {
tdigest_double td(100);
for (int i = 0; i < 100; ++i) td.update(i);
const double split_points[2] = {50.0, std::numeric_limits<double>::infinity()};
REQUIRE_THROWS_AS(td.get_CDF(split_points, 2), std::invalid_argument);
}

TEST_CASE("get_CDF rejects negative infinity in split points", "[tdigest]") {
tdigest_double td(100);
for (int i = 0; i < 100; ++i) td.update(i);
const double split_points[2] = {-std::numeric_limits<double>::infinity(), 50.0};
REQUIRE_THROWS_AS(td.get_CDF(split_points, 2), std::invalid_argument);
}

TEST_CASE("get_PMF rejects infinity in split points", "[tdigest]") {
tdigest_double td(100);
for (int i = 0; i < 100; ++i) td.update(i);
const double split_points[1] = {std::numeric_limits<double>::infinity()};
REQUIRE_THROWS_AS(td.get_PMF(split_points, 1), std::invalid_argument);
}

} /* namespace datasketches */