diff --git a/LiteCore/Storage/UnicodeCollator_ICU.cc b/LiteCore/Storage/UnicodeCollator_ICU.cc index 007972931..637d2c873 100644 --- a/LiteCore/Storage/UnicodeCollator_ICU.cc +++ b/LiteCore/Storage/UnicodeCollator_ICU.cc @@ -50,6 +50,8 @@ namespace litecore { using namespace fleece; + + class ICUCollationContext : public CollationContext { public: UCollator* ucoll {nullptr}; @@ -135,6 +137,7 @@ namespace litecore { bool ContainsUTF8(fleece::slice str, fleece::slice substr, const CollationContext &ctx) { + // FIXME: This is quite slow! Call ICU instead return ContainsUTF8_Slow(str, substr, ctx); } diff --git a/LiteCore/Storage/UnicodeCollator_winapi.cc b/LiteCore/Storage/UnicodeCollator_winapi.cc deleted file mode 100644 index bc4b3976c..000000000 --- a/LiteCore/Storage/UnicodeCollator_winapi.cc +++ /dev/null @@ -1,187 +0,0 @@ -// -// UnicodeCollator_winapi.cc -// -// Copyright 2017-Present Couchbase, Inc. -// -// Use of this software is governed by the Business Source License included -// in the file licenses/BSL-Couchbase.txt. As of the Change Date specified -// in that file, in accordance with the Business Source License, use of this -// software will be governed by the Apache License, Version 2.0, included in -// the file licenses/APL2.txt. -// - -#include "UnicodeCollator.hh" -#include "fleece/PlatformCompat.hh" -#include "Error.hh" -#include "Logging.hh" -#include "SQLiteCpp/SQLiteCpp.h" -#include "StringUtil.hh" -#include "TempArray.hh" -#include "NumConversion.hh" -#include -#include - -#ifdef _MSC_VER - -#include - -namespace litecore { - - using namespace std; - using namespace fleece; - - // Stores Windows collation parameters for fast lookup; callback context points to this - class WinApiCollationContext : public CollationContext { - public: - LPWSTR localeName{ nullptr }; - DWORD flags; - - WinApiCollationContext(const Collation &coll) - :CollationContext(coll) - , flags(NORM_IGNOREWIDTH) - { - Assert(coll.unicodeAware); - if (!coll.caseSensitive) - flags |= LINGUISTIC_IGNORECASE; - - if (!coll.diacriticSensitive) - flags |= LINGUISTIC_IGNOREDIACRITIC; - - slice localeSlice = coll.localeName; - localeName = (LPWSTR)calloc(LOCALE_NAME_MAX_LENGTH + 1, sizeof(WCHAR)); - if (localeSlice.buf == nullptr) { - LCID lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT); - LCIDToLocaleName(lcid, localeName, LOCALE_NAME_MAX_LENGTH, 0); - } - else { - string tmp((const char*)localeSlice.buf, localeSlice.size); - replace(tmp, '_', '-'); - MultiByteToWideChar(CP_UTF8, 0, tmp.c_str(), narrow_cast(tmp.size()), localeName, LOCALE_NAME_MAX_LENGTH); - if (LocaleNameToLCID(localeName, 0) == 0) { - Warn("Unknown locale name '%.*s', using default", SPLAT(coll.localeName)); - LCID lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT); - LCIDToLocaleName(lcid, localeName, LOCALE_NAME_MAX_LENGTH, 0); - } - } - } - - ~WinApiCollationContext() { - if (localeName) - free(localeName); - } - }; - - - unique_ptr CollationContext::create(const Collation &coll) { - return make_unique(coll); - } - - - /** Full Unicode-savvy string comparison. */ - static inline int compareStringsUnicode(int len1, const void * chars1, - int len2, const void * chars2, - const WinApiCollationContext &ctx) - { - LPWSTR locale = ctx.localeName; - DWORD winFlags = ctx.flags; - - // +1 these just in case they end up being the same length to not overrun the buffer - TempArray(wchars1, WCHAR, len1 + 1); - int size1 = MultiByteToWideChar(CP_UTF8, 0, (char *)chars1, len1, wchars1, len1 + 1); - while(size1 < len1 + 1) { - wchars1[size1++] = 0; - } - - TempArray(wchars2, WCHAR, len2 + 1); - int size2 = MultiByteToWideChar(CP_UTF8, 0, (char *)chars2, len2, wchars2, len2 + 1); - while(size2 < len2 + 1) { - wchars2[size2++] = 0; - } - - int result = CompareStringEx(locale, winFlags, wchars1, -1, wchars2, -1, nullptr, nullptr, 0); - if (result == 0) { - DWORD err = GetLastError(); - Warn("Failed to compare strings (Error %d), arbitrarily returning equal", err); - return 0; - } - - if (result == CSTR_LESS_THAN) { - return -1; - } - - if (result == CSTR_GREATER_THAN) { - return 1; - } - - return 0; - } - - - static int collateUnicodeCallback(void *context, - int len1, const void * chars1, - int len2, const void * chars2) - { - auto &coll = *(WinApiCollationContext*)context; - if (coll.canCompareASCII) { - int result = CompareASCII(len1, (const uint8_t*)chars1, len2, (const uint8_t*)chars2, - coll.caseSensitive); - if (result != kCompareASCIIGaveUp) - return result; - } - return compareStringsUnicode(len1, chars1, len2, chars2, coll); - } - - - int CompareUTF8(slice str1, slice str2, const Collation &coll) { - return CompareUTF8(str1, str2, WinApiCollationContext(coll)); - } - - - int CompareUTF8(slice str1, slice str2, const CollationContext &ctx) { - return collateUnicodeCallback((void*)&ctx, (int)str1.size, str1.buf, - (int)str2.size, str2.buf); - } - - - int LikeUTF8(fleece::slice str1, fleece::slice str2, const Collation &coll) { - return LikeUTF8(str1, str2, WinApiCollationContext(coll)); - } - - - bool ContainsUTF8(fleece::slice str, fleece::slice substr, const CollationContext &ctx) { - // FIXME: This is quite slow! Call Windows API instead - return ContainsUTF8_Slow(str, substr, ctx); - } - - - unique_ptr RegisterSQLiteUnicodeCollation(sqlite3* dbHandle, - const Collation &coll) { - unique_ptr context(new WinApiCollationContext(coll)); - int rc = sqlite3_create_collation(dbHandle, - coll.sqliteName().c_str(), - SQLITE_UTF8, - (void*)context.get(), - collateUnicodeCallback); - if (rc != SQLITE_OK) - throw SQLite::Exception(dbHandle, rc); - return context; - } - - BOOL __stdcall SupportedLocalesCallback(LPWSTR name, DWORD flags, LPARAM arg) { - auto* locales = (vector *)arg; - size_t len = wcslen(name); - TempArray(buf, char, len + 1); - buf[len] = 0; - WideCharToMultiByte(CP_UTF8, 0, name, wcslen(name), buf, (int)len, NULL, NULL); - locales->push_back(buf); - return TRUE; - } - - vector SupportedLocales() { - vector locales; - EnumSystemLocalesEx(SupportedLocalesCallback, LOCALE_ALL, (LPARAM)&locales, NULL); - return locales; - } -} - -#endif diff --git a/LiteCore/Unix/icu_shim.c b/LiteCore/Support/icu_shim.c similarity index 100% rename from LiteCore/Unix/icu_shim.c rename to LiteCore/Support/icu_shim.c diff --git a/LiteCore/Unix/icu_shim.h b/LiteCore/Support/icu_shim.h similarity index 61% rename from LiteCore/Unix/icu_shim.h rename to LiteCore/Support/icu_shim.h index 7b89ed1d7..f63546ef4 100644 --- a/LiteCore/Unix/icu_shim.h +++ b/LiteCore/Support/icu_shim.h @@ -1,12 +1,34 @@ #if LITECORE_USES_ICU +#ifdef _MSC_VER +#include +#else #include #include +#endif #ifdef __cplusplus extern "C" { #endif +#ifdef _MSC_VER + +// ICU is a part of Windows since Windows 10 1709, no need to shim +// since it is not ultra hard versioned like its Linux counterpart + +#define lc_ucol_open ucol_open +#define lc_ucol_setAttribute ucol_setAttribute +#define lc_ucol_close ucol_close +#define lc_ucol_strcollUTF8 ucol_strcollUTF8 +#define lc_ucasemap_open ucasemap_open +#define lc_ucasemap_utf8ToUpper ucasemap_utf8ToUpper +#define lc_ucasemap_utf8ToLower ucasemap_utf8ToLower +#define lc_ucasemap_close ucasemap_close +#define lc_ucol_countAvailable ucol_countAvailable +#define lc_ucol_getAvailable ucol_getAvailable + +#else + UCollator* lc_ucol_open(const char* loc, UErrorCode* status); void lc_ucol_setAttribute(UCollator* coll, UColAttribute attr, UColAttributeValue value, UErrorCode* status); void lc_ucol_close(UCollator* coll); @@ -18,6 +40,8 @@ void lc_ucasemap_close(UCaseMap* csm); int32_t lc_ucol_countAvailable(void); const char* lc_ucol_getAvailable(int32_t localeIndex); +#endif + #ifdef __cplusplus } #endif diff --git a/LiteCore/tests/SQLiteFunctionsTest.cc b/LiteCore/tests/SQLiteFunctionsTest.cc index a1c893219..9ad70e747 100644 --- a/LiteCore/tests/SQLiteFunctionsTest.cc +++ b/LiteCore/tests/SQLiteFunctionsTest.cc @@ -352,7 +352,7 @@ TEST_CASE("Unicode string functions", "[Query]") { CHECK(UTF8ChangeCase("E"_sl, true) == "E"_sl); CHECK(UTF8ChangeCase("-"_sl, true) == "-"_sl); CHECK(UTF8ChangeCase("Z•rGMai2"_sl, true) == "Z•RGMAI2"_sl); -#if __APPLE__ || defined(_MSC_VER) || LITECORE_USES_ICU +#if __APPLE__ || LITECORE_USES_ICU CHECK(UTF8ChangeCase("Zérgmåī2"_sl, true) == "ZÉRGMÅĪ2"_sl); #endif CHECK(UTF8ChangeCase("😀"_sl, true) == "😀"_sl); @@ -362,7 +362,7 @@ TEST_CASE("Unicode string functions", "[Query]") { CHECK(UTF8ChangeCase("e"_sl, false) == "e"_sl); CHECK(UTF8ChangeCase("-"_sl, false) == "-"_sl); CHECK(UTF8ChangeCase("Z•rGMai2"_sl, false) == "z•rgmai2"_sl); -#if __APPLE__ || defined(_MSC_VER)|| LITECORE_USES_ICU +#if __APPLE__ || LITECORE_USES_ICU CHECK(UTF8ChangeCase("zÉRGMÅĪ2"_sl, false) == "zérgmåī2"_sl); #endif CHECK(UTF8ChangeCase("😀"_sl, false) == "😀"_sl); @@ -411,7 +411,7 @@ N_WAY_TEST_CASE_METHOD(SQLiteFunctionsTest, "SQLite fl_blob", "[Query]") { #pragma mark - COLLATION: -#if __APPLE__ || defined(_MSC_VER) || LITECORE_USES_ICU +#if __APPLE__ || LITECORE_USES_ICU TEST_CASE("Unicode collation", "[Query][Collation]") { struct {slice a; slice b; int result; bool caseSensitive; bool diacriticSensitive;} tests[] = { //---- First, test just ASCII: diff --git a/LiteCore/tests/cmake/platform_win.cmake b/LiteCore/tests/cmake/platform_win.cmake index 3cf5ba3f4..5b820a94d 100644 --- a/LiteCore/tests/cmake/platform_win.cmake +++ b/LiteCore/tests/cmake/platform_win.cmake @@ -4,4 +4,9 @@ function(setup_build) CppTests PRIVATE ${TOP}MSVC ) + + target_compile_definitions( + CppTests PRIVATE + -DLITECORE_USES_ICU=1 + ) endfunction() \ No newline at end of file diff --git a/cmake/platform_android.cmake b/cmake/platform_android.cmake index 625e407cf..1373be827 100644 --- a/cmake/platform_android.cmake +++ b/cmake/platform_android.cmake @@ -23,7 +23,7 @@ function(set_litecore_source) set( ${ANDROID_SSS_RESULT} ${BASE_LITECORE_FILES} - LiteCore/Unix/icu_shim.c + LiteCore/Support/icu_shim.c LiteCore/Android/getifaddrs.cc LiteCore/Android/bionic_netlink.cc PARENT_SCOPE diff --git a/cmake/platform_linux_desktop.cmake b/cmake/platform_linux_desktop.cmake index 0667ba703..99d127d47 100644 --- a/cmake/platform_linux_desktop.cmake +++ b/cmake/platform_linux_desktop.cmake @@ -62,7 +62,7 @@ function(set_litecore_source) set( ${LINUX_SSS_RESULT} ${BASE_LITECORE_FILES} - LiteCore/Unix/icu_shim.c + LiteCore/Support/icu_shim.c PARENT_SCOPE ) endfunction() diff --git a/cmake/platform_win.cmake b/cmake/platform_win.cmake index 2f65465c9..069bdfbae 100644 --- a/cmake/platform_win.cmake +++ b/cmake/platform_win.cmake @@ -15,11 +15,11 @@ function(set_litecore_source) set( ${WIN_SSS_RESULT} ${BASE_LITECORE_FILES} - LiteCore/Storage/UnicodeCollator_winapi.cc + LiteCore/Storage/UnicodeCollator_ICU.cc MSVC/mkstemp.cc MSVC/mkdtemp.cc MSVC/strlcat.c - LiteCore/Support/StringUtil_winapi.cc + LiteCore/Support/StringUtil_icu.cc Crypto/PublicKey+Windows.cc PARENT_SCOPE ) @@ -57,6 +57,7 @@ function(setup_litecore_build_win) -D_USE_MATH_DEFINES # Define math constants like PI -DWIN32 # Identify as WIN32 -DNOMINMAX # Disable min/max macros (they interfere with std::min and max) + -DLITECORE_USES_ICU=1 ) target_compile_definitions( @@ -84,6 +85,7 @@ function(setup_litecore_build_win) Ws2_32 ncrypt crypt32 + icu ) # Compile string literals as UTF-8,