Skip to content

Commit 75633c4

Browse files
detuleshrektanhadleysimonpcouch
authored
Encode non-ASCII Column Names to UTF-8 (#531)
* need to convert the column names to UTF-8 * throw correctly encoded error message when it contains nanodbc error or sql * add docs on the encoding of database and driver * offload work to output_encoder --------- Co-authored-by: shrektan <[email protected]> Co-authored-by: Hadley Wickham <[email protected]> Co-authored-by: Simon P. Couch <[email protected]>
1 parent 4156ff1 commit 75633c4

File tree

4 files changed

+26
-10
lines changed

4 files changed

+26
-10
lines changed

NEWS.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# odbc (development version)
22

3+
* odbc now always converts the encoding of non-ASCII column names of the SQL
4+
results to UTF-8. (@shrektan, #430)
5+
6+
* Fixed issue that odbc may throw errors with garbage letters when the encoding
7+
of client and db-server are different. (@shrektan, #432)
8+
39
* dbListFields: Now works with DBI::Id and DBI::SQL identifiers.
410

511
* Transitioned `odbcDataType()` to use S4 for consistency. S3 methods defined

src/Iconv.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ using namespace Rcpp;
88
#include "Iconv.h"
99

1010
Iconv::Iconv(const std::string& from, const std::string& to) {
11-
if (from == "UTF-8") {
11+
if (from.empty() || from == to) {
1212
cd_ = NULL;
1313
} else {
1414
cd_ = Riconv_open(to.c_str(), from.c_str());

src/odbc_result.cpp

+9-7
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ void odbc_result::execute() {
5353
num_columns_ = r_->columns();
5454
} catch (const nanodbc::database_error& e) {
5555
c_->set_current_result(nullptr);
56-
throw odbc_error(e, sql_);
56+
throw odbc_error(e, sql_, output_encoder_);
5757
} catch (...) {
5858
c_->set_current_result(nullptr);
5959
throw;
@@ -462,7 +462,13 @@ std::vector<std::string> odbc_result::column_names(nanodbc::result const& r) {
462462
std::vector<std::string> names;
463463
names.reserve(num_columns_);
464464
for (short i = 0; i < num_columns_; ++i) {
465-
names.push_back(r.column_name(i));
465+
nanodbc::string_type name = r.column_name(i);
466+
// We expect column names to share the same encoding as the
467+
// data itself. Similar to the handling of string fields,
468+
// convert to UTF-8 before returning to user ( if needed )
469+
names.push_back(
470+
output_encoder_.makeString(name.c_str(), name.c_str() + name.length())
471+
);
466472
}
467473
return names;
468474
}
@@ -831,11 +837,7 @@ void odbc_result::assign_string(
831837
if (value.is_null(column)) {
832838
res = NA_STRING;
833839
} else {
834-
if (c_->encoding() != "") {
835-
res = output_encoder_.makeSEXP(str.c_str(), str.c_str() + str.length());
836-
} else { // If no encoding specified assume it is UTF-8 / ASCII
837-
res = Rf_mkCharCE(str.c_str(), CE_UTF8);
838-
}
840+
res = output_encoder_.makeSEXP(str.c_str(), str.c_str() + str.length());
839841
}
840842
}
841843
SET_STRING_ELT(out[column], row, res);

src/odbc_result.h

+10-2
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,21 @@ inline void signal_unknown_field_type(short type, const std::string& name) {
1919

2020
class odbc_error : public Rcpp::exception {
2121
public:
22-
odbc_error(const nanodbc::database_error& e, const std::string& sql)
22+
odbc_error(
23+
const nanodbc::database_error& e,
24+
const std::string& sql,
25+
Iconv& output_encoder)
2326
: Rcpp::exception("", false) {
24-
message = std::string(e.what()) + "\n<SQL> '" + sql + "'";
27+
std::string m = std::string(e.what()) + "\n<SQL> '" + sql + "'";
28+
// #432: [R] expects UTF-8 encoded strings but both nanodbc and sql are
29+
// encoded in the database encoding, which may differ from UTF-8
30+
message = Rf_translateChar(
31+
output_encoder.makeSEXP(m.c_str(), m.c_str() + m.length()));
2532
}
2633
const char* what() const NANODBC_NOEXCEPT { return message.c_str(); }
2734

2835
private:
36+
// #432: must be native encoded, as R expects native encoded chars for error msg
2937
std::string message;
3038
};
3139

0 commit comments

Comments
 (0)