Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 28 additions & 23 deletions cpp/src/gandiva/precompiled/time.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ extern "C" {
#include "./time_constants.h"
#include "./time_fields.h"
#include "./types.h"
#include "arrow/util/macros.h"

#define MINS_IN_HOUR 60
#define SECONDS_IN_MINUTE 60
Expand Down Expand Up @@ -566,6 +567,28 @@ bool is_valid_time(const int hours, const int minutes, const int seconds) {
seconds < 60;
}

// Normalize sub-seconds value to milliseconds precision (3 digits).
// Truncates if more than 3 digits are provided, pads with zeros if fewer than 3 digits
ARROW_FORCE_INLINE
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might use the regular 'inline'. I don't see ARROW_FORCE_INLINE really being used anywhere and I don't feel like I know enough that this function should be forced inline.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that 'FORCE_INLINE' is used a lot, so maybe that is ok.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see ARROW_FORCE_INLINE really being used anywhere

That's right, ARROW_FORCE_INLINE is used only in this file: https://github.com/akravchukdremio/arrow/blob/GH-48866/cpp/src/arrow/compute/kernels/gather_internal.h#L55. Initially I've used FORCE_INLINE, which is used in many places as far as I can see (around 10 files in gandiva), but then I've changed it to ARROW_FORCE_INLINE to address this comment from committer: #48867 (comment).

I don't feel like I know enough that this function should be forced inline

This is just a helper function, initially I've done my changes directly in castTIMESTAMP_utf8 and castTIME_utf8 functions: b377b59, but then there was a comment to factor out a common code: #48867 (comment) - so I decided to create small function for that.

Why I've used FORCE_INLINE initially? Because there was already a function in this file with such macros, see here: https://github.com/akravchukdremio/arrow/blob/GH-48866/cpp/src/gandiva/precompiled/time.cc#L564-L568, and I've decided to use it too to make our code execution faster (gandiva is aiming for it, AFAIK), since invoking function is an action to put new variables in the call stack. But I agree, that probably just inline keyword would be enough, function is not too complex, so compiler seems anyway will inline it by just having inline keyword.

@kou what do you think, is it fine to just remove ARROW_FORCE_INLINE and use only inline keyword instead?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that 'FORCE_INLINE' is used a lot, so maybe that is ok.

It was initially there, then I've changed it to ARROW_FORCE_INLINE to address another review comment: #48867 (comment)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we don't need force inline, inline is OK.

For FORCE_INLINE and ARROW_FORCE_INLINE: I want to remove FORCE_INLINE and GDV_FORCE_INLINE and use ARROW_FORCE_INLINE instead of them. We don't need multiple similar definitions in the same project.

static inline int32_t normalize_subseconds_to_millis(int32_t subseconds,
int32_t num_digits) {
if (num_digits <= 0 || num_digits == 3) {
// No need to adjust
return subseconds;
}
// Calculate the power of 10 adjustment needed
int32_t digit_diff = num_digits - 3;
while (digit_diff > 0) {
subseconds /= 10;
digit_diff--;
}
while (digit_diff < 0) {
subseconds *= 10;
digit_diff++;
}
return subseconds;
}

// MONTHS_BETWEEN returns number of months between dates date1 and date2.
// If date1 is later than date2, then the result is positive.
// If date1 is earlier than date2, then the result is negative.
Expand Down Expand Up @@ -746,17 +769,8 @@ gdv_timestamp castTIMESTAMP_utf8(int64_t context, const char* input, gdv_int32 l
}

// adjust the milliseconds
if (sub_seconds_len > 0) {
if (sub_seconds_len > 3) {
const char* msg = "Invalid millis for timestamp value ";
set_error_for_date(length, input, msg, context);
return 0;
}
while (sub_seconds_len < 3) {
ts_fields[TimeFields::kSubSeconds] *= 10;
sub_seconds_len++;
}
}
ts_fields[TimeFields::kSubSeconds] =
normalize_subseconds_to_millis(ts_fields[TimeFields::kSubSeconds], sub_seconds_len);
// handle timezone
if (encountered_zone) {
int err = 0;
Expand Down Expand Up @@ -866,18 +880,9 @@ gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) {
}

// adjust the milliseconds
if (sub_seconds_len > 0) {
if (sub_seconds_len > 3) {
const char* msg = "Invalid millis for time value ";
set_error_for_date(length, input, msg, context);
return 0;
}

while (sub_seconds_len < 3) {
time_fields[TimeFields::kSubSeconds - TimeFields::kHours] *= 10;
sub_seconds_len++;
}
}
time_fields[TimeFields::kSubSeconds - TimeFields::kHours] =
normalize_subseconds_to_millis(
time_fields[TimeFields::kSubSeconds - TimeFields::kHours], sub_seconds_len);

int32_t input_hours = time_fields[TimeFields::kHours - TimeFields::kHours];
int32_t input_minutes = time_fields[TimeFields::kMinutes - TimeFields::kHours];
Expand Down
54 changes: 39 additions & 15 deletions cpp/src/gandiva/precompiled/time_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,26 @@ TEST(TestTime, TestCastTimestamp) {
"Not a valid time for timestamp value 2000-01-01 00:00:100");
context.Reset();

EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.0001", 24), 0);
EXPECT_EQ(context.get_error(),
"Invalid millis for timestamp value 2000-01-01 00:00:00.0001");
context.Reset();

EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1000", 24), 0);
EXPECT_EQ(context.get_error(),
"Invalid millis for timestamp value 2000-01-01 00:00:00.1000");
context.Reset();
// Test truncation of subseconds to 3 digits (milliseconds)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add some test cases with less than three digits?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// "2000-01-01 00:00:00.0001" should truncate to "2000-01-01 00:00:00.000"
EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.0001", 24),
castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.000", 23));

// "2000-01-01 00:00:00.1000" should truncate to "2000-01-01 00:00:00.100"
EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1000", 24),
castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.100", 23));

// "2000-01-01 00:00:00.123456789" should truncate to "2000-01-01 00:00:00.123"
EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.123456789", 29),
castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.123", 23));

// "2000-01-01 00:00:00.1999" should truncate to "2000-01-01 00:00:00.199"
EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1999", 24),
castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.199", 23));

// "2000-01-01 00:00:00.1994" should truncate to "2000-01-01 00:00:00.199"
EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1994", 24),
castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.199", 23));
}

TEST(TestTime, TestCastTimeUtf8) {
Expand Down Expand Up @@ -166,13 +177,26 @@ TEST(TestTime, TestCastTimeUtf8) {
EXPECT_EQ(context.get_error(), "Not a valid time value 00:00:100");
context.Reset();

EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.0001", 13), 0);
EXPECT_EQ(context.get_error(), "Invalid millis for time value 00:00:00.0001");
context.Reset();
// Test truncation of subseconds to 3 digits (milliseconds)
// "00:00:00.0001" should truncate to "00:00:00.000"
EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.0001", 13),
castTIME_utf8(context_ptr, "00:00:00.000", 12));

EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1000", 13), 0);
EXPECT_EQ(context.get_error(), "Invalid millis for time value 00:00:00.1000");
context.Reset();
// "00:00:00.1000" should truncate to "00:00:00.100"
EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1000", 13),
castTIME_utf8(context_ptr, "00:00:00.100", 12));

// "9:45:30.123456789" should truncate to "9:45:30.123"
EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.123456789", 17),
castTIME_utf8(context_ptr, "9:45:30.123", 11));

// "00:00:00.1999" should truncate to "00:00:00.199"
EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1999", 13),
castTIME_utf8(context_ptr, "00:00:00.199", 12));

// "00:00:00.1994" should truncate to "00:00:00.199"
EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1994", 13),
castTIME_utf8(context_ptr, "00:00:00.199", 12));
}

#ifndef _WIN32
Expand Down