Skip to content

Commit 07f7c60

Browse files
protocolstardustsingaraiona
authored andcommitted
feat(cmp) - add f64 distinct support and temporal filtering combinatorics
1 parent 0a2c747 commit 07f7c60

8 files changed

Lines changed: 186 additions & 23 deletions

File tree

core/cmp.c

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,41 @@ typedef obj_p (*ray_cmp_f)(obj_p, obj_p, i64_t, i64_t, obj_p);
6767
NULL_OBJ; \
6868
})
6969

70+
// Indirection wrappers: force macro expansion before token-pasting in __CMP_*
71+
#define __CMP_A_V_X(x, y, lt, rt, mt, op, ln, of, ov) __CMP_A_V(x, y, lt, rt, mt, op, ln, of, ov)
72+
#define __CMP_V_A_X(x, y, lt, rt, mt, op, ln, of, ov) __CMP_V_A(x, y, lt, rt, mt, op, ln, of, ov)
73+
#define __CMP_V_V_X(x, y, lt, rt, mt, op, ln, of, ov) __CMP_V_V(x, y, lt, rt, mt, op, ln, of, ov)
74+
75+
// Date↔Timestamp: EQ/NE truncate timestamp to date; LT/GT/LE/GE promote date to timestamp
76+
#define __DT_CMP_EQ(dt, ts) EQI32(dt, timestamp_to_date(ts))
77+
#define __DT_CMP_NE(dt, ts) NEI32(dt, timestamp_to_date(ts))
78+
#define __DT_CMP_LT(dt, ts) LTI64(date_to_timestamp(dt), ts)
79+
#define __DT_CMP_GT(dt, ts) GTI64(date_to_timestamp(dt), ts)
80+
#define __DT_CMP_LE(dt, ts) LEI64(date_to_timestamp(dt), ts)
81+
#define __DT_CMP_GE(dt, ts) GEI64(date_to_timestamp(dt), ts)
82+
83+
#define __TD_CMP_EQ(ts, dt) EQI32(timestamp_to_date(ts), dt)
84+
#define __TD_CMP_NE(ts, dt) NEI32(timestamp_to_date(ts), dt)
85+
#define __TD_CMP_LT(ts, dt) LTI64(ts, date_to_timestamp(dt))
86+
#define __TD_CMP_GT(ts, dt) GTI64(ts, date_to_timestamp(dt))
87+
#define __TD_CMP_LE(ts, dt) LEI64(ts, date_to_timestamp(dt))
88+
#define __TD_CMP_GE(ts, dt) GEI64(ts, date_to_timestamp(dt))
89+
90+
// Vector comparison target type and op, per operator
91+
#define __DT_MT_EQ date
92+
#define __DT_MT_NE date
93+
#define __DT_MT_LT timestamp
94+
#define __DT_MT_GT timestamp
95+
#define __DT_MT_LE timestamp
96+
#define __DT_MT_GE timestamp
97+
98+
#define __DT_OP_EQ EQI32
99+
#define __DT_OP_NE NEI32
100+
#define __DT_OP_LT LTI64
101+
#define __DT_OP_GT GTI64
102+
#define __DT_OP_LE LEI64
103+
#define __DT_OP_GE GEI64
104+
70105
#define __DECLARE_CMP_FN(op) \
71106
obj_p ray_##op##_partial(obj_p x, obj_p y, i64_t len, i64_t offset, obj_p res) { \
72107
i64_t i; \
@@ -394,22 +429,22 @@ typedef obj_p (*ray_cmp_f)(obj_p, obj_p, i64_t, i64_t, obj_p);
394429
case MTYPE2(TYPE_F64, TYPE_F64): \
395430
return __CMP_V_V(x, y, f64, f64, f64, op##F64, len, offset, res); \
396431
\
397-
case MTYPE2(-TYPE_DATE, -TYPE_TIMESTAMP): \
398-
return b8(op##F64(date_to_timestamp(x->i32), y->i64)); \
399-
case MTYPE2(-TYPE_DATE, TYPE_TIMESTAMP): \
400-
return __CMP_A_V(x, y, date, timestamp, timestamp, op##I64, len, offset, res); \
401-
case MTYPE2(TYPE_DATE, -TYPE_TIMESTAMP): \
402-
return __CMP_V_A(x, y, date, timestamp, timestamp, op##I64, len, offset, res); \
403-
case MTYPE2(TYPE_DATE, TYPE_TIMESTAMP): \
404-
return __CMP_V_V(x, y, date, timestamp, timestamp, op##I64, len, offset, res); \
405-
case MTYPE2(-TYPE_TIMESTAMP, -TYPE_DATE): \
406-
return b8(op##F64(x->i64, date_to_timestamp(y->i32))); \
407-
case MTYPE2(-TYPE_TIMESTAMP, TYPE_DATE): \
408-
return __CMP_A_V(x, y, timestamp, date, timestamp, op##I64, len, offset, res); \
409-
case MTYPE2(TYPE_TIMESTAMP, -TYPE_DATE): \
410-
return __CMP_V_A(x, y, timestamp, date, timestamp, op##I64, len, offset, res); \
411-
case MTYPE2(TYPE_TIMESTAMP, TYPE_DATE): \
412-
return __CMP_V_V(x, y, timestamp, date, timestamp, op##I64, len, offset, res); \
432+
case MTYPE2(-TYPE_DATE, -TYPE_TIMESTAMP): \
433+
return b8(__DT_CMP_##op(x->i32, y->i64)); \
434+
case MTYPE2(-TYPE_DATE, TYPE_TIMESTAMP): \
435+
return __CMP_A_V_X(x, y, date, timestamp, __DT_MT_##op, __DT_OP_##op, len, offset, res); \
436+
case MTYPE2(TYPE_DATE, -TYPE_TIMESTAMP): \
437+
return __CMP_V_A_X(x, y, date, timestamp, __DT_MT_##op, __DT_OP_##op, len, offset, res); \
438+
case MTYPE2(TYPE_DATE, TYPE_TIMESTAMP): \
439+
return __CMP_V_V_X(x, y, date, timestamp, __DT_MT_##op, __DT_OP_##op, len, offset, res); \
440+
case MTYPE2(-TYPE_TIMESTAMP, -TYPE_DATE): \
441+
return b8(__TD_CMP_##op(x->i64, y->i32)); \
442+
case MTYPE2(-TYPE_TIMESTAMP, TYPE_DATE): \
443+
return __CMP_A_V_X(x, y, timestamp, date, __DT_MT_##op, __DT_OP_##op, len, offset, res); \
444+
case MTYPE2(TYPE_TIMESTAMP, -TYPE_DATE): \
445+
return __CMP_V_A_X(x, y, timestamp, date, __DT_MT_##op, __DT_OP_##op, len, offset, res); \
446+
case MTYPE2(TYPE_TIMESTAMP, TYPE_DATE): \
447+
return __CMP_V_V_X(x, y, timestamp, date, __DT_MT_##op, __DT_OP_##op, len, offset, res); \
413448
\
414449
case MTYPE2(TYPE_ENUM, -TYPE_SYMBOL): \
415450
k = ray_key(x); \

core/compose.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,10 @@ obj_p ray_distinct(obj_p x) {
867867
res = index_distinct_i64(AS_I64(x), l);
868868
res->type = x->type;
869869
return res;
870+
case TYPE_F64:
871+
l = x->len;
872+
res = index_distinct_f64(AS_F64(x), l);
873+
return res;
870874
case TYPE_ENUM:
871875
l = ops_count(x);
872876
res = index_distinct_i64(AS_I64(ENUM_VAL(x)), l);
@@ -991,6 +995,24 @@ obj_p ray_distinct(obj_p x) {
991995
drop_obj(combined);
992996
return res;
993997
}
998+
case TYPE_PARTEDF64: {
999+
obj_p combined = NULL_OBJ;
1000+
for (i64_t i = 0; i < x->len; i++) {
1001+
obj_p part = AS_LIST(x)[i];
1002+
if (combined == NULL_OBJ)
1003+
combined = clone_obj(part);
1004+
else {
1005+
obj_p tmp = ray_concat(combined, part);
1006+
drop_obj(combined);
1007+
combined = tmp;
1008+
}
1009+
}
1010+
if (combined == NULL_OBJ)
1011+
return F64(0);
1012+
res = index_distinct_f64(AS_F64(combined), combined->len);
1013+
drop_obj(combined);
1014+
return res;
1015+
}
9941016
case TYPE_PARTEDGUID: {
9951017
obj_p combined = NULL_OBJ;
9961018
for (i64_t i = 0; i < x->len; i++) {

core/index.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,45 @@ obj_p index_distinct_i64(i64_t values[], i64_t len) {
606606
return vec;
607607
}
608608

609+
obj_p index_distinct_f64(f64_t values[], i64_t len) {
610+
i64_t i, j = 0;
611+
i64_t p, *out;
612+
obj_p vec, set;
613+
f64_t *fout;
614+
615+
set = ht_oa_create(len, -1);
616+
617+
for (i = 0; i < len; i++) {
618+
if (ISNANF64(values[i]))
619+
continue;
620+
// Normalize -0.0 to +0.0 so bit patterns match
621+
f64_t v = values[i] == 0.0 ? 0.0 : values[i];
622+
i64_t key;
623+
memcpy(&key, &v, sizeof(f64_t));
624+
p = ht_oa_tab_next(&set, key);
625+
out = AS_I64(AS_LIST(set)[0]);
626+
if (out[p] == NULL_I64) {
627+
out[p] = key;
628+
j++;
629+
}
630+
}
631+
632+
vec = F64(j);
633+
fout = AS_F64(vec);
634+
635+
out = AS_I64(AS_LIST(set)[0]);
636+
len = AS_LIST(set)[0]->len;
637+
638+
for (i = 0, j = 0; i < len; i++) {
639+
if (out[i] != NULL_I64)
640+
memcpy(&fout[j++], &out[i], sizeof(f64_t));
641+
}
642+
643+
drop_obj(set);
644+
vec->attrs |= ATTR_DISTINCT;
645+
return vec;
646+
}
647+
609648
obj_p index_distinct_guid(guid_t values[], i64_t len) {
610649
i64_t i, j;
611650
i64_t p, *out;

core/index.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ obj_p index_distinct_i8(i8_t values[], i64_t len);
6868
obj_p index_distinct_i16(i16_t values[], i64_t len);
6969
obj_p index_distinct_i32(i32_t values[], i64_t len);
7070
obj_p index_distinct_i64(i64_t values[], i64_t len);
71+
obj_p index_distinct_f64(f64_t values[], i64_t len);
7172
obj_p index_distinct_guid(guid_t values[], i64_t len);
7273
obj_p index_distinct_obj(obj_p values[], i64_t len);
7374
obj_p index_in_i8_i8(i8_t x[], i64_t xl, i8_t y[], i64_t yl);

core/rayforce.c

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2483,8 +2483,9 @@ obj_p cast_obj(i8_t type, obj_p obj) {
24832483
case MTYPE2(-TYPE_DATE, -TYPE_TIME):
24842484
return adate(obj->i32);
24852485
case MTYPE2(-TYPE_DATE, -TYPE_I64):
2486-
case MTYPE2(-TYPE_DATE, -TYPE_TIMESTAMP):
24872486
return adate((i32_t)obj->i64);
2487+
case MTYPE2(-TYPE_DATE, -TYPE_TIMESTAMP):
2488+
return adate((i32_t)(obj->i64 / NANOS_FROM_DAY));
24882489
case MTYPE2(-TYPE_DATE, -TYPE_F64):
24892490
return adate((i32_t)obj->f64);
24902491

@@ -2499,8 +2500,9 @@ obj_p cast_obj(i8_t type, obj_p obj) {
24992500
case MTYPE2(-TYPE_TIME, -TYPE_DATE):
25002501
return atime(obj->i32);
25012502
case MTYPE2(-TYPE_TIME, -TYPE_I64):
2502-
case MTYPE2(-TYPE_TIME, -TYPE_TIMESTAMP):
25032503
return atime((i32_t)obj->i64);
2504+
case MTYPE2(-TYPE_TIME, -TYPE_TIMESTAMP):
2505+
return atime((i32_t)((obj->i64 % NANOS_FROM_DAY) / NANOS_FROM_MILLIS));
25042506
case MTYPE2(-TYPE_TIME, -TYPE_F64):
25052507
return atime((i32_t)obj->f64);
25062508

@@ -2512,9 +2514,11 @@ obj_p cast_obj(i8_t type, obj_p obj) {
25122514
case MTYPE2(-TYPE_TIMESTAMP, -TYPE_I16):
25132515
return timestamp((i64_t)obj->i16);
25142516
case MTYPE2(-TYPE_TIMESTAMP, -TYPE_I32):
2517+
return timestamp((i64_t)obj->i32);
25152518
case MTYPE2(-TYPE_TIMESTAMP, -TYPE_DATE):
2519+
return timestamp((i64_t)obj->i32 * NANOS_FROM_DAY);
25162520
case MTYPE2(-TYPE_TIMESTAMP, -TYPE_TIME):
2517-
return timestamp((i64_t)obj->i32);
2521+
return timestamp((i64_t)obj->i32 * NANOS_FROM_MILLIS);
25182522
case MTYPE2(-TYPE_TIMESTAMP, -TYPE_I64):
25192523
return timestamp(obj->i64);
25202524
case MTYPE2(-TYPE_TIMESTAMP, -TYPE_F64):
@@ -2683,14 +2687,24 @@ obj_p cast_obj(i8_t type, obj_p obj) {
26832687
case MTYPE2(TYPE_I32, TYPE_I64):
26842688
case MTYPE2(TYPE_I32, TYPE_TIMESTAMP):
26852689
case MTYPE2(TYPE_DATE, TYPE_I64):
2686-
case MTYPE2(TYPE_DATE, TYPE_TIMESTAMP):
26872690
case MTYPE2(TYPE_TIME, TYPE_I64):
2688-
case MTYPE2(TYPE_TIME, TYPE_TIMESTAMP):
26892691
l = obj->len;
26902692
res = vector(type, l);
26912693
for (i = 0; i < l; i++)
26922694
AS_I32(res)[i] = (i32_t)AS_I64(obj)[i];
26932695
return res;
2696+
case MTYPE2(TYPE_DATE, TYPE_TIMESTAMP):
2697+
l = obj->len;
2698+
res = vector(TYPE_DATE, l);
2699+
for (i = 0; i < l; i++)
2700+
AS_I32(res)[i] = (i32_t)(AS_I64(obj)[i] / NANOS_FROM_DAY);
2701+
return res;
2702+
case MTYPE2(TYPE_TIME, TYPE_TIMESTAMP):
2703+
l = obj->len;
2704+
res = vector(TYPE_TIME, l);
2705+
for (i = 0; i < l; i++)
2706+
AS_I32(res)[i] = (i32_t)((AS_I64(obj)[i] % NANOS_FROM_DAY) / NANOS_FROM_MILLIS);
2707+
return res;
26942708
case MTYPE2(TYPE_I32, TYPE_DATE):
26952709
case MTYPE2(TYPE_I32, TYPE_TIME):
26962710
case MTYPE2(TYPE_DATE, TYPE_I32):
@@ -2732,13 +2746,23 @@ obj_p cast_obj(i8_t type, obj_p obj) {
27322746
case MTYPE2(TYPE_I64, TYPE_DATE):
27332747
case MTYPE2(TYPE_I64, TYPE_TIME):
27342748
case MTYPE2(TYPE_TIMESTAMP, TYPE_I32):
2735-
case MTYPE2(TYPE_TIMESTAMP, TYPE_DATE):
2736-
case MTYPE2(TYPE_TIMESTAMP, TYPE_TIME):
27372749
l = obj->len;
27382750
res = vector(type, l);
27392751
for (i = 0; i < l; i++)
27402752
AS_I64(res)[i] = (i64_t)AS_I32(obj)[i];
27412753
return res;
2754+
case MTYPE2(TYPE_TIMESTAMP, TYPE_DATE):
2755+
l = obj->len;
2756+
res = vector(TYPE_TIMESTAMP, l);
2757+
for (i = 0; i < l; i++)
2758+
AS_I64(res)[i] = (i64_t)AS_I32(obj)[i] * NANOS_FROM_DAY;
2759+
return res;
2760+
case MTYPE2(TYPE_TIMESTAMP, TYPE_TIME):
2761+
l = obj->len;
2762+
res = vector(TYPE_TIMESTAMP, l);
2763+
for (i = 0; i < l; i++)
2764+
AS_I64(res)[i] = (i64_t)AS_I32(obj)[i] * NANOS_FROM_MILLIS;
2765+
return res;
27422766
case MTYPE2(TYPE_I64, TYPE_F64):
27432767
case MTYPE2(TYPE_TIMESTAMP, TYPE_F64):
27442768
l = obj->len;

tests/casting.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,33 @@ test_result_t test_cast_identity_roundtrip() {
494494
// Roundtrip: timestamp -> i64 -> timestamp
495495
TEST_ASSERT_EQ("(as 'timestamp (as 'i64 2024.01.01D12:30:45.123456789))", "2024.01.01D12:30:45.123456789");
496496

497+
// Timestamp -> date: extract date part
498+
TEST_ASSERT_EQ("(as 'date 2024.06.15D10:30:00.000000000)", "2024.06.15");
499+
TEST_ASSERT_EQ("(as 'date 2000.01.01D00:00:00.000000000)", "2000.01.01");
500+
501+
// Timestamp -> time: extract time part
502+
TEST_ASSERT_EQ("(as 'time 2024.06.15D10:30:00.000000000)", "10:30:00.000");
503+
TEST_ASSERT_EQ("(as 'time 2024.06.15D23:59:59.999000000)", "23:59:59.999");
504+
505+
// Date -> timestamp: date at midnight
506+
TEST_ASSERT_EQ("(as 'timestamp 2024.06.15)", "2024.06.15D00:00:00.000000000");
507+
TEST_ASSERT_EQ("(as 'timestamp 2000.01.01)", "2000.01.01D00:00:00.000000000");
508+
509+
// Time -> timestamp: epoch date + time
510+
TEST_ASSERT_EQ("(as 'timestamp 10:30:00.000)", "2000.01.01D10:30:00.000000000");
511+
512+
// Roundtrip: timestamp -> date -> timestamp (loses time part)
513+
TEST_ASSERT_EQ("(as 'timestamp (as 'date 2024.06.15D10:30:00.000000000))", "2024.06.15D00:00:00.000000000");
514+
515+
// Vector: timestamp -> date
516+
TEST_ASSERT_EQ("(as 'date [2024.06.15D10:30:00.000000000 2024.12.25D23:59:59.000000000])", "[2024.06.15 2024.12.25]");
517+
518+
// Vector: date -> timestamp
519+
TEST_ASSERT_EQ("(as 'timestamp [2024.06.15 2024.12.25])", "[2024.06.15D00:00:00.000000000 2024.12.25D00:00:00.000000000]");
520+
521+
// Vector: timestamp -> time
522+
TEST_ASSERT_EQ("(as 'time [2024.06.15D10:30:00.000000000 2024.12.25D23:59:59.999000000])", "[10:30:00.000 23:59:59.999]");
523+
497524
PASS();
498525
}
499526

tests/cmp_logic.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,9 +621,20 @@ test_result_t test_cmp_temporal() {
621621
TEST_ASSERT_EQ("(< [2024.01.01D10:00:00.000000000] [2024.01.01D10:00:01.000000000])", "[true]");
622622

623623
// Cross-temporal comparisons (date vs timestamp)
624+
// LT/GT: date promoted to midnight timestamp (precise ordering)
624625
TEST_ASSERT_EQ("(< 2024.01.01 2024.01.01D10:00:00.000000000)", "true");
625626
TEST_ASSERT_EQ("(> 2024.01.01D10:00:00.000000000 2024.01.01)", "true");
626627
TEST_ASSERT_EQ("(< [2024.01.01] [2024.01.01D10:00:00.000000000])", "[true]");
628+
// EQ/NE: timestamp truncated to date (same-day semantics)
629+
TEST_ASSERT_EQ("(== 2024.01.01D10:30:00.000000000 2024.01.01)", "true");
630+
TEST_ASSERT_EQ("(== 2024.01.01D23:59:59.999999999 2024.01.01)", "true");
631+
TEST_ASSERT_EQ("(== 2024.01.01D00:00:00.000000000 2024.01.01)", "true");
632+
TEST_ASSERT_EQ("(== 2024.01.02D00:00:00.000000000 2024.01.01)", "false");
633+
TEST_ASSERT_EQ("(!= 2024.01.01D10:30:00.000000000 2024.01.02)", "true");
634+
TEST_ASSERT_EQ("(== 2024.01.01 2024.01.01D10:30:00.000000000)", "true");
635+
// Vector: timestamp column filtered by date
636+
TEST_ASSERT_EQ("(== [2024.01.01D10:00:00.000000000 2024.01.02D05:00:00.000000000] 2024.01.01)", "[true false]");
637+
TEST_ASSERT_EQ("(== 2024.01.01 [2024.01.01D10:00:00.000000000 2024.01.02D05:00:00.000000000])", "[true false]");
627638

628639
PASS();
629640
}

tests/vector_ops.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,10 @@ test_result_t test_vec_distinct() {
180180
TEST_ASSERT_EQ("(distinct (list [3i 3i] 2i [3i 3i] 2i))", "(list 2i [3i 3i])");
181181
// String distinct
182182
TEST_ASSERT_EQ("(distinct \"aabbcc\")", "\"abc\"");
183+
// F64 distinct
184+
TEST_ASSERT_EQ("(distinct [1.0 2.0 1.0 3.0])", "[1.0 2.0 3.0]");
185+
TEST_ASSERT_EQ("(distinct [1.5 1.5 1.5])", "[1.5]");
186+
TEST_ASSERT_EQ("(distinct [0.0 -0.0 1.0])", "[0.0 1.0]");
183187

184188
PASS();
185189
}

0 commit comments

Comments
 (0)