Skip to content

Commit 5e237e1

Browse files
committed
In-progress implement unicode support conversion
1 parent ae93151 commit 5e237e1

4 files changed

Lines changed: 64 additions & 30 deletions

File tree

cpp/src/arrow/flight/sql/odbc/odbc_impl/config/configuration.cc

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -49,34 +49,34 @@ std::string ReadDsnString(const std::string& dsn, std::string_view key,
4949
CONVERT_WIDE_STR(const std::wstring wkey, key);
5050
CONVERT_WIDE_STR(const std::wstring wdflt, dflt);
5151

52-
// -AL- found workaround for `cannot convert 'const wchar_t*' to 'LPCWSTR' {aka
53-
// 'const short unsigned int*'}` on Linux. Notes in this file for reference only.
54-
55-
// Via CONVERT_WIDE_STR, Arrow correctly converts to UFT-32 on Unix systems,
56-
// so the conversion from wchar_t to short unsigned int* will work on Linux.
57-
58-
// -AL- I just need to wrap `reinterpret_cast<LPCWSTR>()` on all string args for
59-
// SQLGetPrivateProfileString.
52+
// -AL- next up: figure out why `buf` is always empty
53+
// (buf is default value if the default value is passed)
54+
// Have tried `.odbc.ini` but it doesn't work
55+
// DSN name is correct because isql finds it
6056

6157
#define BUFFER_SIZE (1024)
6258
std::vector<SQLWCHAR> buf(BUFFER_SIZE);
6359
int ret = SQLGetPrivateProfileString(
64-
reinterpret_cast<LPCWSTR>(wdsn.c_str()), reinterpret_cast<LPCWSTR>(wkey.c_str()),
65-
reinterpret_cast<LPCWSTR>(wdflt.c_str()), buf.data(), static_cast<int>(buf.size()),
66-
reinterpret_cast<LPCWSTR>(L"ODBC.INI"));
60+
reinterpret_cast<LPCWSTR>(GET_SQWCHAR_PTR(wdsn)),
61+
reinterpret_cast<LPCWSTR>(GET_SQWCHAR_PTR(wkey)),
62+
reinterpret_cast<LPCWSTR>(GET_SQWCHAR_PTR(wdflt)), buf.data(),
63+
static_cast<int>(buf.size()),
64+
reinterpret_cast<LPCWSTR>(GET_SQWCHAR_PTR(std::wstring(L"ODBC.INI"))));
6765

6866
if (ret > BUFFER_SIZE) {
6967
// If there wasn't enough space, try again with the right size buffer.
7068
buf.resize(ret + 1);
7169
ret = SQLGetPrivateProfileString(
72-
reinterpret_cast<LPCWSTR>(wdsn.c_str()), reinterpret_cast<LPCWSTR>(wkey.c_str()),
73-
reinterpret_cast<LPCWSTR>(wdflt.c_str()), buf.data(),
74-
static_cast<int>(buf.size()), reinterpret_cast<LPCWSTR>(L"ODBC.INI"));
70+
reinterpret_cast<LPCWSTR>(GET_SQWCHAR_PTR(wdsn)),
71+
reinterpret_cast<LPCWSTR>(GET_SQWCHAR_PTR(wkey)),
72+
reinterpret_cast<LPCWSTR>(GET_SQWCHAR_PTR(wdflt)), buf.data(),
73+
static_cast<int>(buf.size()),
74+
reinterpret_cast<LPCWSTR>(GET_SQWCHAR_PTR(std::wstring(L"ODBC.INI"))));
7575
}
7676

7777
std::string result("");
7878
ARROW_LOG(DEBUG) << "-AL- ReadDsnString key: " << key;
79-
ARROW_LOG(DEBUG) << "-AL- ReadDsnString result before: " << result;
79+
ARROW_LOG(DEBUG) << "-AL- ReadDsnString result before: (should be empty) " << result;
8080
SetAttributeSQLWCHAR(buf.data(), ret * GetSqlWCharSize(), result);
8181
ARROW_LOG(DEBUG) << "-AL- ReadDsnString result: " << result;
8282
ARROW_LOG(DEBUG) << "-AL- ReadDsnString ret: " << ret;
@@ -102,15 +102,17 @@ std::vector<std::string> ReadAllKeys(const std::string& dsn) {
102102
std::vector<SQLWCHAR> buf(BUFFER_SIZE);
103103

104104
int ret = SQLGetPrivateProfileString(
105-
reinterpret_cast<LPCWSTR>(wdsn.c_str()), NULL, reinterpret_cast<LPCWSTR>(L""),
106-
buf.data(), static_cast<int>(buf.size()), reinterpret_cast<LPCWSTR>(L"ODBC.INI"));
105+
reinterpret_cast<LPCWSTR>(GET_SQWCHAR_PTR(wdsn)), NULL,
106+
reinterpret_cast<LPCWSTR>(L""), buf.data(), static_cast<int>(buf.size()),
107+
reinterpret_cast<LPCWSTR>(GET_SQWCHAR_PTR(std::wstring(L"ODBC.INI"))));
107108

108109
if (ret > BUFFER_SIZE) {
109110
// If there wasn't enough space, try again with the right size buffer.
110111
buf.resize(ret + 1);
111112
ret = SQLGetPrivateProfileString(
112-
reinterpret_cast<LPCWSTR>(wdsn.c_str()), NULL, reinterpret_cast<LPCWSTR>(L""),
113-
buf.data(), static_cast<int>(buf.size()), reinterpret_cast<LPCWSTR>(L"ODBC.INI"));
113+
reinterpret_cast<LPCWSTR>(GET_SQWCHAR_PTR(wdsn)), NULL,
114+
reinterpret_cast<LPCWSTR>(L""), buf.data(), static_cast<int>(buf.size()),
115+
reinterpret_cast<LPCWSTR>(GET_SQWCHAR_PTR(std::wstring(L"ODBC.INI"))));
114116
}
115117

116118
// When you pass NULL to SQLGetPrivateProfileString it gives back a \0 delimited list of

cpp/src/arrow/flight/sql/odbc/odbc_impl/encoding_utils.h

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,26 @@
2828
#include <memory>
2929
#include <string>
3030

31+
// Workaround for ODBC `BOOL` def conflict on Linux
32+
#ifdef __linux__
33+
# ifdef BOOL
34+
# undef BOOL
35+
# endif // BOOL
36+
#endif // __linux__
37+
// Include fwd.h headers after ODBC headers
38+
#include "arrow/flight/sql/odbc/odbc_impl/util.h"
39+
40+
#include "arrow/util/logging.h" // -AL- TEMP
41+
3142
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
3243

44+
#ifdef __linux__
45+
# define GET_SQWCHAR_PTR(wstring_var) (ODBC::ToSqlWCharVector(wstring_var).data())
46+
#else
47+
// Windows and macOS
48+
# define GET_SQWCHAR_PTR(wstring_var) (wstring_var.c_str())
49+
#endif
50+
3351
namespace ODBC {
3452
using arrow::flight::sql::odbc::DriverException;
3553
using arrow::flight::sql::odbc::GetSqlWCharSize;
@@ -118,11 +136,25 @@ inline std::string SqlStringToString(const unsigned char* sql_str,
118136
return res;
119137
}
120138

139+
// On Linux, unixodbc defines SQLWCHAR as `unsigned short`
121140
inline std::vector<SQLWCHAR> ToSqlWCharVector(const std::wstring& ws) {
122-
std::vector<SQLWCHAR> buf;
123-
// buf.assign(ws.begin(), ws.end());
124-
// TODO implement in separate PR
125-
return buf;
141+
ARROW_LOG(DEBUG) << "-AL- sizeof(SQLWCHAR):" << sizeof(SQLWCHAR)
142+
<< ", sizeof(wchar_t):" << sizeof(wchar_t);
143+
144+
ARROW_LOG(DEBUG) << "-AL- sizeof(char16_t):" << sizeof(char16_t)
145+
<< ", sizeof(char32_t):" << sizeof(char32_t);
146+
if (sizeof(SQLWCHAR) == sizeof(wchar_t)) {
147+
ARROW_LOG(DEBUG) << "-AL- sizeof(SQLWCHAR) equals sizeof(wchar_t) ";
148+
return std::vector<SQLWCHAR>(ws.begin(), ws.end());
149+
} else {
150+
ARROW_LOG(DEBUG) << "-AL- sizeof(SQLWCHAR) != doesn't equal sizeof(wchar_t) ";
151+
CONVERT_UTF8_STR(const std::string utf8s, ws);
152+
CONVERT_UTF16_STR(const std::u16string utf16s, utf8s);
153+
// std::string WideStringToUTF8(input);
154+
// std::u16string u16s = UTF8StringToUTF16();
155+
156+
return std::vector<SQLWCHAR>(utf16s.begin(), utf16s.end());
157+
}
126158
}
127159

128160
} // namespace ODBC

cpp/src/arrow/flight/sql/odbc/odbc_impl/system_dsn.cc

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,6 @@
2525

2626
#include "arrow/flight/sql/odbc/odbc_impl/encoding_utils.h"
2727

28-
#ifdef __linux__
29-
# define GET_SQWCHAR_PTR(wstring_var) (ODBC::ToSqlWCharVector(wstring_var).data())
30-
#else
31-
// Windows and macOS
32-
# define GET_SQWCHAR_PTR(wstring_var) (wstring_var.c_str())
33-
#endif
34-
3528
namespace arrow::flight::sql::odbc {
3629

3730
using config::Configuration;

cpp/src/arrow/flight/sql/odbc/odbc_impl/util.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@
4242
return res.ValueOrDie(); \
4343
}()
4444

45+
#define CONVERT_UTF16_STR(utf16string_var, utf8_target) \
46+
utf16string_var = [&] { \
47+
arrow::Result<std::u16string> res = arrow::util::UTF8StringToUTF16(utf8_target); \
48+
arrow::flight::sql::odbc::util::ThrowIfNotOK(res.status()); \
49+
return res.ValueOrDie(); \
50+
}()
51+
4552
namespace arrow::flight::sql::odbc {
4653
namespace util {
4754

0 commit comments

Comments
 (0)