From c70e4f764c1a88f5cced93d00430632fd3575281 Mon Sep 17 00:00:00 2001
From: venom1204 <venomplays1204@gmail.com>
Date: Thu, 18 Jun 2026 20:17:13 +0000
Subject: [PATCH 1/3] added info

---
 vignettes/datatable-joins.Rmd | 43 ++++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/vignettes/datatable-joins.Rmd b/vignettes/datatable-joins.Rmd
index d8581eb7a9..32a823d79f 100644
--- a/vignettes/datatable-joins.Rmd
+++ b/vignettes/datatable-joins.Rmd
@@ -226,7 +226,23 @@ Products[
         total_value = price * count)
 ]
 ```
+#### 3.1.4. Identifying matches in key-only tables
 
+When joining a table `y` to a "lookup" table `x` that contains only keys, the resulting join column defaults to the value in `y`. To explicitly check if a match was found in `x`, we can use the `x.` prefix. If `x.col` is `NA`, no match was found.
+
+```{r}
+# Lookup table of authorized IDs
+authorized_ids = data.table(user_id = c(1L, 2L, 5L), key = "user_id")
+# New login attempts
+logins = data.table(user_id = c(1L, 3L, 5L))
+
+# By selecting x.user_id, we can identify which logins exist in the authorized table
+authorized_ids[logins, on = .(user_id), .(user_id, is_authorized = !is.na(x.user_id))]
+#    user_id is_authorized
+# 1:       1          TRUE
+# 2:       3         FALSE
+# 3:       5          TRUE
+```
 
 ##### Summarizing with `on` in `data.table`
 
@@ -253,7 +269,7 @@ dt2 = ProductReceived[
 identical(dt1, dt2)
 ```
 
-#### 3.1.4. Joining based on several columns
+#### 3.1.5. Joining based on several columns
 
 So far we have just joined `data.table`s based on 1 column, but it's important to know that the package can join tables matching several columns.
 
@@ -629,6 +645,31 @@ ProductPriceHistory[ProductSales,
                     j = .(product_id, date, count, price)]
 ```
 
+### 5.1. Calculating Staleness (Join Distance)
+
+In rolling joins, `data.table` matches to the nearest available record. By default, the join column in the result displays the value from the i table (the time you "queried"). To see the actual time of the record that was found in `x`, use the `x`. prefix. The difference between these two is often called "staleness."
+
+```{r}
+# Prices updated at specific times
+prices = data.table(
+  time = as.ITime(c("10:00:00", "10:05:00", "10:10:00")),
+  price = c(100, 105, 110),
+  key = "time"
+)
+
+# A trade happens at 10:07:00
+trade = data.table(time = as.ITime("10:07:00"))
+
+# Using x.time to see the actual record time found
+prices[trade, on = .(time), roll = TRUE, 
+       .(queried_time = time, 
+         actual_time = x.time, 
+         price, 
+         staleness = time - x.time)]
+#    queried_time actual_time price staleness
+# 1:     10:07:00    10:05:00   105  00:02:00
+```
+
 ## 6. Taking advantage of joining speed
 
 ### 6.1. Subsets as joins

From 91390220b40d2dc93dafade82af59de9b422eb52 Mon Sep 17 00:00:00 2001
From: venom1204 <venomplays1204@gmail.com>
Date: Fri, 19 Jun 2026 19:29:48 +0000
Subject: [PATCH 2/3] ..

---
 vignettes/datatable-joins.Rmd | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/vignettes/datatable-joins.Rmd b/vignettes/datatable-joins.Rmd
index 32a823d79f..60b0baf200 100644
--- a/vignettes/datatable-joins.Rmd
+++ b/vignettes/datatable-joins.Rmd
@@ -236,12 +236,10 @@ authorized_ids = data.table(user_id = c(1L, 2L, 5L), key = "user_id")
 # New login attempts
 logins = data.table(user_id = c(1L, 3L, 5L))
 
-# By selecting x.user_id, we can identify which logins exist in the authorized table
-authorized_ids[logins, on = .(user_id), .(user_id, is_authorized = !is.na(x.user_id))]
-#    user_id is_authorized
-# 1:       1          TRUE
-# 2:       3         FALSE
-# 3:       5          TRUE
+# Use "user_id" as a string in the 'on' argument
+authorized_ids[logins, on = "user_id", 
+               .(user_id = i.user_id, 
+                 is_authorized = !is.na(x.user_id))]
 ```
 
 ##### Summarizing with `on` in `data.table`
@@ -661,13 +659,12 @@ prices = data.table(
 trade = data.table(time = as.ITime("10:07:00"))
 
 # Using x.time to see the actual record time found
-prices[trade, on = .(time), roll = TRUE, 
-       .(queried_time = time, 
+# Use "time" as a string to avoid conflict with base::time
+prices[trade, on = "time", roll = TRUE, 
+       .(queried_time = i.time, 
          actual_time = x.time, 
          price, 
-         staleness = time - x.time)]
-#    queried_time actual_time price staleness
-# 1:     10:07:00    10:05:00   105  00:02:00
+         staleness = i.time - x.time)]
 ```
 
 ## 6. Taking advantage of joining speed

From a7dd97800f31a87bdcc9b3e139a55b169bcf6a67 Mon Sep 17 00:00:00 2001
From: venom1204 <venomplays1204@gmail.com>
Date: Fri, 19 Jun 2026 19:34:45 +0000
Subject: [PATCH 3/3] ..

---
 vignettes/datatable-joins.Rmd | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/vignettes/datatable-joins.Rmd b/vignettes/datatable-joins.Rmd
index 60b0baf200..14f080b9c8 100644
--- a/vignettes/datatable-joins.Rmd
+++ b/vignettes/datatable-joins.Rmd
@@ -236,10 +236,8 @@ authorized_ids = data.table(user_id = c(1L, 2L, 5L), key = "user_id")
 # New login attempts
 logins = data.table(user_id = c(1L, 3L, 5L))
 
-# Use "user_id" as a string in the 'on' argument
-authorized_ids[logins, on = "user_id", 
-               .(user_id = i.user_id, 
-                 is_authorized = !is.na(x.user_id))]
+# By selecting x.user_id, we can identify which logins exist in the authorized table
+authorized_ids[logins, on = "user_id", .(user_id, is_authorized = !is.na(x.user_id))]
 ```
 
 ##### Summarizing with `on` in `data.table`
@@ -649,6 +647,7 @@ In rolling joins, `data.table` matches to the nearest available record. By defau
 
 ```{r}
 # Prices updated at specific times
+# Prices updated at specific times
 prices = data.table(
   time = as.ITime(c("10:00:00", "10:05:00", "10:10:00")),
   price = c(100, 105, 110),
@@ -659,12 +658,11 @@ prices = data.table(
 trade = data.table(time = as.ITime("10:07:00"))
 
 # Using x.time to see the actual record time found
-# Use "time" as a string to avoid conflict with base::time
-prices[trade, on = "time", roll = TRUE, 
-       .(queried_time = i.time, 
+prices[trade, on = .(time), roll = TRUE, 
+       .(queried_time = time, 
          actual_time = x.time, 
          price, 
-         staleness = i.time - x.time)]
+         staleness = time - x.time)]
 ```
 
 ## 6. Taking advantage of joining speed