Skip to content

Commit 11bb066

Browse files
authored
Merge pull request #471 from Ghabry/no-iconv
Remove Iconv support
2 parents 272daa5 + d94eea3 commit 11bb066

File tree

8 files changed

+122
-325
lines changed

8 files changed

+122
-325
lines changed

CMakeLists.txt

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ project(liblcf VERSION 0.8 LANGUAGES CXX)
88

99
# Compilation options
1010
option(BUILD_SHARED_LIBS "Build shared library, disable for building the static library (default: ON)" ON)
11-
option(LIBLCF_WITH_ICU "ICU encoding detection (when OFF fallback to iconv, not recommended, default: ON)" ON)
11+
option(LIBLCF_WITH_ICU "ICU encoding handling (when disabled only windows-1252 is supported, default: ON)" ON)
1212
option(LIBLCF_WITH_XML "XML reading support (expat, default: ON)" ON)
1313
option(LIBLCF_UPDATE_MIMEDB "Whether to run update-mime-database after install (default: ON)" ON)
1414
option(LIBLCF_ENABLE_TOOLS "Whether to build the tools (default: ON)" ON)
@@ -343,16 +343,13 @@ set_property(TARGET lcf PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON)
343343
# Name of the exported library
344344
set_property(TARGET lcf PROPERTY EXPORT_NAME liblcf)
345345

346-
# icu or fallback to iconv
346+
# icu
347347
set(LCF_SUPPORT_ICU 0)
348348
if(LIBLCF_WITH_ICU)
349349
find_package(ICU COMPONENTS i18n uc data REQUIRED)
350350
target_link_libraries(lcf ICU::i18n ICU::uc ICU::data)
351351
list(APPEND LIBLCF_DEPS "icu-i18n")
352352
set(LCF_SUPPORT_ICU 1)
353-
else()
354-
find_package(Iconv REQUIRED)
355-
target_link_libraries(lcf Iconv::Iconv)
356353
endif()
357354

358355
# expat

builds/cmake/Modules/FindIconv.cmake

Lines changed: 0 additions & 133 deletions
This file was deleted.

builds/cmake/liblcf-config.cmake.in

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ include(CMakeFindDependencyMacro)
44

55
if(@LCF_SUPPORT_ICU@)
66
find_dependency(ICU COMPONENTS i18n uc data REQUIRED)
7-
else()
8-
find_dependency(Iconv REQUIRED)
97
endif()
108

119
if(@LCF_SUPPORT_XML@)

configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ AM_CONDITIONAL(CROSS_COMPILING,[test "x$cross_compiling" = "xyes"])
2222

2323
# Checks for libraries.
2424
AC_SUBST([LCF_SUPPORT_ICU],[0])
25-
AC_ARG_ENABLE([icu],[AS_HELP_STRING([--disable-icu],[Disable ICU encoding detection (fallback to iconv) [default=no]])])
25+
AC_ARG_ENABLE([icu],[AS_HELP_STRING([--disable-icu],[Disable ICU encoding handling (only windows-1252 supported) [default=no]])])
2626
AS_IF([test "x$enable_icu" != "xno"],[
2727
AX_PKG_CHECK_MODULES([ICU],[],[icu-i18n],[LCF_SUPPORT_ICU=1])
2828
])

src/encoder.cpp

Lines changed: 75 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,17 @@
1313
#include "lcf/scope_guard.h"
1414
#include <cstdio>
1515
#include <cstdlib>
16-
#include <exception>
1716

1817
#if LCF_SUPPORT_ICU
1918
# include <unicode/ucsdet.h>
2019
# include <unicode/ucnv.h>
2120
#else
22-
# ifdef _MSC_VER
23-
# error MSVC builds require ICU
24-
# endif
21+
# include <cstdint>
2522
#endif
2623

2724
#ifdef _WIN32
2825
# include <windows.h>
2926
#else
30-
# if !LCF_SUPPORT_ICU
31-
# include <iconv.h>
32-
# endif
3327
# include <locale>
3428
#endif
3529

@@ -82,12 +76,12 @@ void Encoder::Init() {
8276
return;
8377
}
8478

85-
#if LCF_SUPPORT_ICU
8679
auto code_page = atoi(_encoding.c_str());
8780
const auto& storage_encoding = code_page > 0
8881
? ReaderUtil::CodepageToEncoding(code_page)
8982
: _encoding;
9083

84+
#if LCF_SUPPORT_ICU
9185
auto status = U_ZERO_ERROR;
9286
constexpr auto runtime_encoding = "UTF-8";
9387
auto conv_runtime = ucnv_open(runtime_encoding, &status);
@@ -111,26 +105,30 @@ void Encoder::Init() {
111105
_conv_runtime = conv_runtime;
112106
_conv_storage = conv_storage;
113107
#else
114-
_conv_runtime = const_cast<char*>("UTF-8");
115-
_conv_storage = const_cast<char*>(_encoding.c_str());
108+
if (storage_encoding != "windows-1252") {
109+
return;
110+
}
111+
112+
_conv_runtime = 65001;
113+
_conv_storage = 1252;
116114
#endif
117115
}
118116

119-
void Encoder::Reset() {
120117
#if LCF_SUPPORT_ICU
121-
auto* conv = reinterpret_cast<UConverter*>(_conv_runtime);
122-
if (conv) ucnv_close(conv);
123-
conv = reinterpret_cast<UConverter*>(_conv_storage);
124-
if (conv) ucnv_close(conv);
125-
#endif
126-
}
118+
void Encoder::Reset() {
119+
if (_conv_runtime) {
120+
ucnv_close(_conv_runtime);
121+
_conv_runtime = nullptr;
122+
}
127123

124+
if (_conv_storage) {
125+
ucnv_close(_conv_storage);
126+
_conv_storage = nullptr;
127+
}
128+
}
128129

129-
void Encoder::Convert(std::string& str, void* conv_dst_void, void* conv_src_void) {
130-
#if LCF_SUPPORT_ICU
130+
void Encoder::Convert(std::string& str, UConverter* conv_dst, UConverter* conv_src) {
131131
const auto& src = str;
132-
auto* conv_dst = reinterpret_cast<UConverter*>(conv_dst_void);
133-
auto* conv_src = reinterpret_cast<UConverter*>(conv_src_void);
134132

135133
auto status = U_ZERO_ERROR;
136134
_buffer.resize(src.size() * 4);
@@ -151,36 +149,65 @@ void Encoder::Convert(std::string& str, void* conv_dst_void, void* conv_src_void
151149
}
152150

153151
str.assign(_buffer.data(), dst_p);
154-
return;
152+
}
155153
#else
156-
auto* conv_dst = reinterpret_cast<const char*>(conv_dst_void);
157-
auto* conv_src = reinterpret_cast<const char*>(conv_src_void);
158-
iconv_t cd = iconv_open(conv_dst, conv_src);
159-
if (cd == (iconv_t)-1)
160-
return;
161-
char *src = &str.front();
162-
size_t src_left = str.size();
163-
size_t dst_size = str.size() * 5 + 10;
164-
_buffer.resize(dst_size);
165-
char *dst = _buffer.data();
166-
size_t dst_left = dst_size;
167-
# ifdef ICONV_CONST
168-
char ICONV_CONST *p = src;
169-
# else
170-
char *p = src;
171-
# endif
172-
char *q = dst;
173-
size_t status = iconv(cd, &p, &src_left, &q, &dst_left);
174-
iconv_close(cd);
175-
if (status == (size_t) -1 || src_left > 0) {
176-
str.clear();
154+
void Encoder::Convert(std::string& str, int conv_dst, int) {
155+
if (str.empty()) {
177156
return;
178157
}
179-
*q++ = '\0';
180-
str.assign(dst, dst_size - dst_left);
181-
return;
182-
#endif
158+
159+
size_t buf_idx = 0;
160+
161+
if (conv_dst == 65001) {
162+
// From 1252 to UTF-8
163+
// Based on https://stackoverflow.com/q/4059775/
164+
_buffer.resize(str.size() * 2 + 1);
165+
166+
for (unsigned char ch: str) {
167+
if (ch < 0x80) {
168+
_buffer[buf_idx] = static_cast<char>(ch);
169+
} else {
170+
_buffer[buf_idx] = static_cast<char>(0xC0 | (ch >> 6));
171+
++buf_idx;
172+
_buffer[buf_idx] = static_cast<char>(0x80 | (ch & 0x3F));
173+
}
174+
175+
++buf_idx;
176+
}
177+
} else {
178+
// From UTF-8 to 1252
179+
// Based on https://stackoverflow.com/q/23689733/
180+
_buffer.resize(str.size() + 1);
181+
uint32_t codepoint;
182+
183+
for (size_t str_idx = 0; str_idx < str.size(); ++str_idx) {
184+
unsigned char ch = str[str_idx];
185+
if (ch <= 0x7F) {
186+
codepoint = ch;
187+
} else if (ch <= 0xBF) {
188+
codepoint = (codepoint << 6) | (ch & 0x3F);
189+
} else if (ch <= 0xDF) {
190+
codepoint = ch & 0x1F;
191+
} else if (ch <= 0xEF) {
192+
codepoint = ch & 0x0F;
193+
} else {
194+
codepoint = ch & 0x07;
195+
}
196+
++str_idx;
197+
ch = str[str_idx];
198+
if (((ch & 0xC0) != 0x80) && (codepoint <= 0x10ffff)) {
199+
if (codepoint <= 255) {
200+
_buffer[buf_idx] = static_cast<char>(codepoint);
201+
} else {
202+
_buffer[buf_idx] = '?';
203+
}
204+
}
205+
++buf_idx;
206+
}
207+
}
208+
209+
str.assign(_buffer.data(), buf_idx);
183210
}
211+
#endif
184212

185213
} //namespace lcf
186-

0 commit comments

Comments
 (0)