-
Notifications
You must be signed in to change notification settings - Fork 4.1k
Expand file tree
/
Copy patharrow_cpp11.h
More file actions
489 lines (404 loc) · 14.1 KB
/
arrow_cpp11.h
File metadata and controls
489 lines (404 loc) · 14.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <cstring> // for strlen
#include <limits>
#include <memory>
#include <utility>
#include <vector>
#undef Free
#include <cpp11.hpp>
#include "./nameof.h"
// Simple dcheck that doesn't use assert (i.e., won't crash the R session)
// Condition this on our own debug flag to avoid this ending up in any CRAN
// checks.
#if defined(ARROW_R_DEBUG)
#define ARROW_R_DCHECK(EXPR) \
do { \
if (!(EXPR)) Rf_error("Failed DCHECK: %s evaluated to false", #EXPR); \
} while (false)
#else
#define ARROW_R_DCHECK(EXPR)
#endif
#if (R_VERSION < R_Version(3, 5, 0))
#define LOGICAL_RO(x) ((const int*)LOGICAL(x))
#define INTEGER_RO(x) ((const int*)INTEGER(x))
#define REAL_RO(x) ((const double*)REAL(x))
#define COMPLEX_RO(x) ((const Rcomplex*)COMPLEX(x))
#define STRING_PTR_RO(x) ((const SEXP*)STRING_PTR(x))
#define RAW_RO(x) ((const Rbyte*)RAW(x))
#define DATAPTR_RO(x) ((const void*)STRING_PTR(x))
#define DATAPTR(x) (void*)STRING_PTR(x)
#endif
// R_altrep_class_name and R_altrep_class_package don't exist before R 4.6
#if R_VERSION < R_Version(4, 6, 0)
inline SEXP R_altrep_class_name(SEXP x) {
return ALTREP(x) ? CAR(ATTRIB(ALTREP_CLASS(x))) : R_NilValue;
}
inline SEXP R_altrep_class_package(SEXP x) {
return ALTREP(x) ? CADR(ATTRIB(ALTREP_CLASS(x))) : R_NilValue;
}
#endif
namespace arrow {
namespace r {
template <typename T>
struct Pointer {
Pointer() : ptr_(new T()) {}
explicit Pointer(SEXP x) {
if (TYPEOF(x) == EXTPTRSXP) {
ptr_ = (T*)R_ExternalPtrAddr(x);
} else if (TYPEOF(x) == STRSXP && Rf_length(x) == 1) {
// User passed a character representation of the pointer address
SEXP char0 = STRING_ELT(x, 0);
if (char0 == NA_STRING) {
cpp11::stop("Can't convert NA_character_ to pointer");
}
const char* input_chars = CHAR(char0);
char* endptr;
uint64_t ptr_value = strtoull(input_chars, &endptr, 0);
if (endptr != (input_chars + strlen(input_chars))) {
cpp11::stop("Can't parse '%s' as a 64-bit integer address", input_chars);
}
ptr_ = reinterpret_cast<T*>(static_cast<uintptr_t>(ptr_value));
} else if (Rf_inherits(x, "integer64") && Rf_length(x) == 1) {
// User passed an integer64(1) of the pointer address
// an integer64 is a REALSXP under the hood, with the bytes
// of each double reinterpreted as an int64.
uint64_t ptr_value;
memcpy(&ptr_value, REAL(x), sizeof(uint64_t));
ptr_ = reinterpret_cast<T*>(static_cast<uintptr_t>(ptr_value));
} else if (TYPEOF(x) == RAWSXP && Rf_length(x) == sizeof(T*)) {
// User passed a raw(<pointer size>) with the literal bytes of the
// pointer.
memcpy(&ptr_, RAW(x), sizeof(T*));
} else if (TYPEOF(x) == REALSXP && Rf_length(x) == 1) {
// User passed a double(1) of the static-casted pointer address.
ptr_ = reinterpret_cast<T*>(static_cast<uintptr_t>(REAL(x)[0]));
} else {
cpp11::stop("Can't convert input object to pointer");
}
}
inline operator SEXP() const { return R_MakeExternalPtr(ptr_, R_NilValue, R_NilValue); }
inline operator T*() const { return ptr_; }
inline void finalize() { delete ptr_; }
T* ptr_;
};
// until cpp11 has a similar class
class complexs {
public:
using value_type = Rcomplex;
explicit complexs(SEXP x) : data_(x) {}
inline R_xlen_t size() const { return XLENGTH(data_); }
inline operator SEXP() const { return data_; }
private:
cpp11::sexp data_;
};
// functions that need to be called from an unwind_protect()
namespace unsafe {
inline const char* utf8_string(SEXP s) { return Rf_translateCharUTF8(s); }
inline R_xlen_t r_string_size(SEXP s) {
if (s == NA_STRING) {
return 0;
} else {
return strlen(Rf_translateCharUTF8(s));
}
}
} // namespace unsafe
inline SEXP utf8_strings(SEXP x) {
return cpp11::unwind_protect([&] {
// ensure that x is not actually altrep first this also ensures that
// x is not altrep even after it is materialized
bool was_altrep = ALTREP(x);
if (was_altrep) {
x = PROTECT(Rf_duplicate(x));
}
R_xlen_t n = XLENGTH(x);
// if `x` is an altrep of some sort, this will
// materialize upfront. That's usually better because
// the loop touches all strings
const SEXP* p_x = STRING_PTR_RO(x);
for (R_xlen_t i = 0; i < n; i++, ++p_x) {
SEXP s = *p_x;
if (s != NA_STRING) {
SET_STRING_ELT(x, i, Rf_mkCharCE(Rf_translateCharUTF8(s), CE_UTF8));
}
}
if (was_altrep) {
UNPROTECT(1);
}
return x;
});
}
struct symbols {
static SEXP units;
static SEXP tzone;
static SEXP xp;
static SEXP dot_Internal;
static SEXP inspect;
static SEXP row_names;
static SEXP serialize_arrow_r_metadata;
static SEXP as_list;
static SEXP ptype;
static SEXP byte_width;
static SEXP list_size;
static SEXP arrow_attributes;
static SEXP new_;
static SEXP create;
static SEXP arrow;
};
struct data {
static SEXP classes_POSIXct;
static SEXP classes_metadata_r;
static SEXP classes_vctrs_list_of;
static SEXP classes_tbl_df;
static SEXP classes_arrow_binary;
static SEXP classes_arrow_large_binary;
static SEXP classes_arrow_fixed_size_binary;
static SEXP classes_arrow_list;
static SEXP classes_arrow_large_list;
static SEXP classes_arrow_fixed_size_list;
static SEXP classes_factor;
static SEXP classes_ordered;
static SEXP names_metadata;
};
struct ns {
static SEXP arrow;
};
template <typename Pointer>
Pointer r6_to_pointer(SEXP self) {
if (!Rf_inherits(self, "ArrowObject")) {
std::string type_name = arrow::util::nameof<
cpp11::decay_t<typename std::remove_pointer<Pointer>::type>>();
cpp11::stop("Invalid R object for %s, must be an ArrowObject", type_name.c_str());
}
SEXP xp = Rf_findVarInFrame(self, arrow::r::symbols::xp);
if (xp == R_NilValue) {
cpp11::stop("Invalid: self$`.:xp:.` is NULL");
}
void* p = R_ExternalPtrAddr(xp);
if (p == nullptr) {
SEXP klass = Rf_getAttrib(self, R_ClassSymbol);
cpp11::stop("Invalid <%s>, external pointer to null", CHAR(STRING_ELT(klass, 0)));
}
return reinterpret_cast<Pointer>(p);
}
template <typename T>
void r6_reset_pointer(SEXP r6) {
SEXP xp = Rf_findVarInFrame(r6, arrow::r::symbols::xp);
void* p = R_ExternalPtrAddr(xp);
if (p != nullptr) {
delete reinterpret_cast<const std::shared_ptr<T>*>(p);
R_SetExternalPtrAddr(xp, nullptr);
}
}
// T is either std::shared_ptr<U> or std::unique_ptr<U>
// e.g. T = std::shared_ptr<arrow::Array>
template <typename T>
class ExternalPtrInput {
public:
explicit ExternalPtrInput(SEXP self) : ptr_(r6_to_pointer<const T*>(self)) {}
operator const T&() const { return *ptr_; }
private:
const T* ptr_;
};
template <typename T>
class VectorExternalPtrInput {
public:
explicit VectorExternalPtrInput(SEXP self) : vec_(XLENGTH(self)) {
R_xlen_t i = 0;
for (auto& element : vec_) {
element = *r6_to_pointer<const T*>(VECTOR_ELT(self, i++));
}
}
operator const std::vector<T>&() const { return vec_; }
private:
std::vector<T> vec_;
};
template <typename T>
class DefaultInput {
public:
explicit DefaultInput(SEXP from) : from_(from) {}
operator T() const { return cpp11::as_cpp<T>(from_); }
private:
SEXP from_;
};
template <typename T>
class ConstReferenceInput {
public:
explicit ConstReferenceInput(SEXP from) : obj_(cpp11::as_cpp<T>(from)) {}
using const_reference = const T&;
operator const_reference() const { return obj_; }
private:
T obj_;
};
template <typename T>
struct Input {
using type = DefaultInput<T>;
};
template <typename T>
struct Input<const T&> {
using type = ConstReferenceInput<typename std::decay<T>::type>;
};
template <typename T>
struct Input<const std::shared_ptr<T>&> {
using type = ExternalPtrInput<std::shared_ptr<T>>;
};
template <typename T>
struct Input<const std::unique_ptr<T>&> {
using type = ExternalPtrInput<std::unique_ptr<T>>;
};
template <typename T>
struct Input<const std::vector<std::shared_ptr<T>>&> {
using type = VectorExternalPtrInput<std::shared_ptr<T>>;
};
template <typename Rvector, typename T, typename ToVectorElement>
Rvector to_r_vector(const std::vector<std::shared_ptr<T>>& x,
ToVectorElement&& to_element) {
R_xlen_t n = x.size();
Rvector out(n);
for (R_xlen_t i = 0; i < n; i++) {
out[i] = to_element(x[i]);
}
return out;
}
template <typename T, typename ToString>
cpp11::writable::strings to_r_strings(const std::vector<std::shared_ptr<T>>& x,
ToString&& to_string) {
return to_r_vector<cpp11::writable::strings>(x, std::forward<ToString>(to_string));
}
template <typename T, typename ToListElement>
cpp11::writable::list to_r_list(const std::vector<std::shared_ptr<T>>& x,
ToListElement&& to_element) {
auto as_sexp = [&](const std::shared_ptr<T>& t) { return to_element(t); };
return to_r_vector<cpp11::writable::list>(x, as_sexp);
}
template <typename T>
cpp11::writable::list to_r_list(const std::vector<std::shared_ptr<T>>& x);
inline cpp11::writable::integers short_row_names(int n) { return {NA_INTEGER, -n}; }
template <typename T>
std::vector<T> from_r_list(cpp11::list args) {
std::vector<T> vec;
R_xlen_t n = args.size();
for (R_xlen_t i = 0; i < n; i++) {
vec.push_back(cpp11::as_cpp<T>(args[i]));
}
return vec;
}
bool GetBoolOption(const std::string& name, bool default_);
// A version of vctrs::vec_size() limited to the types that are
// supported at the C++ level. We currently handle record-style
// vectors (e.g., POSIXlt) at the R level such that by the time
// they get to C++ they are just a data.frame. This version also
// supports long vectors.
static inline R_xlen_t vec_size(SEXP x) {
if (Rf_inherits(x, "data.frame")) {
if (Rf_length(x) > 0) {
return Rf_xlength(VECTOR_ELT(x, 0));
} else {
// This will expand the rownames if attr(x, "row.names") is ALTREP;
// however, this is probably not an important performance consideration
// since zero-column data.frames do not occur in many workflows.
return Rf_xlength(Rf_getAttrib(x, R_RowNamesSymbol));
}
} else {
return Rf_xlength(x);
}
}
} // namespace r
} // namespace arrow
namespace cpp11 {
template <typename T>
SEXP to_r6(const std::shared_ptr<T>& ptr, const char* r6_class_name) {
if (ptr == nullptr) return R_NilValue;
cpp11::external_pointer<std::shared_ptr<T>> xp(new std::shared_ptr<T>(ptr));
SEXP r6_class = Rf_install(r6_class_name);
// R_existsVarInFrame doesn't exist before R 4.2, so we need to fall back to
// Rf_findVarInFrame3 if it is not defined.
#if R_VERSION >= R_Version(4, 2, 0)
if (!R_existsVarInFrame(arrow::r::ns::arrow, r6_class)) {
cpp11::stop("No arrow R6 class named '%s'", r6_class_name);
}
#else
if (Rf_findVarInFrame3(arrow::r::ns::arrow, r6_class, FALSE) == R_UnboundValue) {
cpp11::stop("No arrow R6 class named '%s'", r6_class_name);
}
#endif
// make call: <symbol>$new(<x>)
SEXP call = PROTECT(Rf_lang3(R_DollarSymbol, r6_class, arrow::r::symbols::new_));
SEXP call2 = PROTECT(Rf_lang2(call, xp));
// and then eval in arrow::
SEXP r6 = PROTECT(Rf_eval(call2, arrow::r::ns::arrow));
UNPROTECT(3);
return r6;
}
/// This trait defines a single static function which returns the name of the R6 class
/// which corresponds to T. By default, this is just the c++ class name with any
/// namespaces stripped, for example the R6 class for arrow::ipc::RecordBatchStreamReader
/// is simply named "RecordBatchStreamReader".
///
/// Some classes require specializations of this trait. For example the R6 classes which
/// wrap arrow::csv::ReadOptions and arrow::json::ReadOptions would collide if both were
/// named "ReadOptions", so they are named "CsvReadOptions" and "JsonReadOptions"
/// respectively. Other classes such as arrow::Array are base classes and the proper R6
/// class name must be derived by examining a discriminant like Array::type_id.
///
/// All specializations are located in arrow_types.h
template <typename T>
struct r6_class_name;
template <typename T>
SEXP to_r6(const std::shared_ptr<T>& x) {
if (x == nullptr) return R_NilValue;
return to_r6(x, cpp11::r6_class_name<T>::get(x));
}
} // namespace cpp11
namespace arrow {
namespace r {
template <typename T>
cpp11::writable::list to_r_list(const std::vector<std::shared_ptr<T>>& x) {
auto as_sexp = [&](const std::shared_ptr<T>& t) { return cpp11::to_r6<T>(t); };
return to_r_vector<cpp11::writable::list>(x, as_sexp);
}
} // namespace r
} // namespace arrow
struct r_vec_size {
explicit r_vec_size(R_xlen_t x) : value(x) {}
R_xlen_t value;
};
namespace cpp11 {
template <typename T>
using enable_if_shared_ptr = typename std::enable_if<
std::is_same<std::shared_ptr<typename T::element_type>, T>::value, T>::type;
template <typename T>
enable_if_shared_ptr<T> as_cpp(SEXP from) {
return arrow::r::ExternalPtrInput<T>(from);
}
template <typename E>
enable_if_enum<E, SEXP> as_sexp(E e) {
return as_sexp(static_cast<int>(e));
}
template <typename T>
SEXP as_sexp(const std::shared_ptr<T>& ptr) {
return cpp11::to_r6<T>(ptr);
}
inline SEXP as_sexp(r_vec_size size) {
R_xlen_t x = size.value;
if (x > std::numeric_limits<int>::max()) {
return Rf_ScalarReal(x);
} else {
return Rf_ScalarInteger(static_cast<int>(x));
}
}
} // namespace cpp11