Skip to content

Commit 037dfe6

Browse files
authored
Merge pull request #273 from fmatthew5876/opt_str
Optimize String Parsing routines
2 parents 67202d6 + d8ed54e commit 037dfe6

File tree

11 files changed

+339
-65
lines changed

11 files changed

+339
-65
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ add_library(lcf
4545
src/reader_flags.cpp
4646
src/reader_lcf.cpp
4747
src/reader_util.cpp
48+
src/encoder.cpp
4849
src/reader_xml.cpp
4950
src/rpg_fixup.cpp
5051
src/rpg_setup.cpp

Makefile.am

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ liblcf_la_SOURCES = \
4141
src/lmu_movecommand.cpp \
4242
src/lmu_reader.cpp \
4343
src/lsd_reader.cpp \
44+
src/encoder.cpp \
4445
src/reader_flags.cpp \
4546
src/reader_lcf.cpp \
4647
src/reader_util.cpp \
@@ -116,12 +117,14 @@ pkginclude_HEADERS = \
116117
src/data.h \
117118
src/ini.h \
118119
src/inireader.h \
120+
src/scope_guard.h \
119121
src/lcf_options.h \
120122
src/lcf_saveopt.h \
121123
src/ldb_reader.h \
122124
src/lmt_reader.h \
123125
src/lmu_reader.h \
124126
src/lsd_reader.h \
127+
src/encoder.h \
125128
src/reader_lcf.h \
126129
src/reader_struct.h \
127130
src/reader_util.h \

builds/vs2015/liblcf.vcxproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@
183183
<ClCompile Include="..\..\src\generated\rpg_enums.cpp" />
184184
<ClCompile Include="..\..\src\reader_struct.cpp" />
185185
<ClCompile Include="..\..\src\data.cpp" />
186+
<ClCompile Include="..\..\src\encoder.cpp" />
186187
<ClCompile Include="..\..\src\ini.cpp" />
187188
<ClCompile Include="..\..\src\inireader.cpp" />
188189
<ClCompile Include="..\..\src\ldb_equipment.cpp" />
@@ -268,8 +269,10 @@
268269
<ItemGroup>
269270
<ClInclude Include="..\..\src\command_codes.h" />
270271
<ClInclude Include="..\..\src\data.h" />
272+
<ClInclude Include="..\..\src\encoder.h" />
271273
<ClInclude Include="..\..\src\ini.h" />
272274
<ClInclude Include="..\..\src\inireader.h" />
275+
<ClInclude Include="..\..\src\scope_guard.h" />
273276
<ClInclude Include="..\..\src\lcf_options.h" />
274277
<ClInclude Include="..\..\src\ldb_reader.h" />
275278
<ClInclude Include="..\..\src\lmt_reader.h" />

src/encoder.cpp

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
#include "encoder.h"
2+
#include "reader_util.h"
3+
#include "scope_guard.h"
4+
#include <exception>
5+
6+
#ifdef LCF_SUPPORT_ICU
7+
# include <unicode/ucsdet.h>
8+
# include <unicode/ucnv.h>
9+
#else
10+
# ifdef _MSC_VER
11+
# error MSVC builds require ICU
12+
# endif
13+
#endif
14+
15+
#ifdef _WIN32
16+
# define WIN32_LEAN_AND_MEAN
17+
# ifndef NOMINMAX
18+
# define NOMINMAX
19+
# endif
20+
# include <windows.h>
21+
#else
22+
# ifndef LCF_SUPPORT_ICU
23+
# include <iconv.h>
24+
# endif
25+
# include <locale>
26+
#endif
27+
28+
#if defined(__MORPHOS__) || defined(__amigaos4__)
29+
#define ICONV_CONST const
30+
#endif
31+
32+
static std::string filterUtf8Compatible(std::string enc) {
33+
#ifdef LCF_SUPPORT_ICU
34+
if (ucnv_compareNames(enc.c_str(), "UTF-8") == 0) {
35+
return "";
36+
}
37+
#endif
38+
return enc;
39+
}
40+
41+
Encoder::Encoder(std::string encoding)
42+
: _encoding(filterUtf8Compatible(std::move(encoding)))
43+
{
44+
Init();
45+
}
46+
47+
Encoder::~Encoder() {
48+
Reset();
49+
}
50+
51+
void Encoder::Encode(std::string& str) {
52+
if (_encoding.empty() || str.empty()) {
53+
return;
54+
}
55+
Convert(str, _conv_runtime, _conv_storage);
56+
}
57+
58+
void Encoder::Decode(std::string& str) {
59+
if (_encoding.empty() || str.empty()) {
60+
return;
61+
}
62+
Convert(str, _conv_storage, _conv_runtime);
63+
}
64+
65+
void Encoder::Init() {
66+
if (_encoding.empty()) {
67+
return;
68+
}
69+
auto code_page = atoi(_encoding.c_str());
70+
const auto& storage_encoding = code_page > 0
71+
? ReaderUtil::CodepageToEncoding(code_page)
72+
: _encoding;
73+
74+
#ifdef LCF_SUPPORT_ICU
75+
auto status = U_ZERO_ERROR;
76+
constexpr auto runtime_encoding = "UTF-8";
77+
auto conv_runtime = ucnv_open(runtime_encoding, &status);
78+
79+
if (conv_runtime == nullptr) {
80+
fprintf(stderr, "liblcf: ucnv_open() error for encoding \"%s\": %s\n", runtime_encoding, u_errorName(status));
81+
throw std::runtime_error("ucnv_open() failed");
82+
}
83+
status = U_ZERO_ERROR;
84+
auto sg = makeScopeGuard([&]() { ucnv_close(conv_runtime); });
85+
86+
auto conv_storage = ucnv_open(storage_encoding.c_str(), &status);
87+
88+
if (conv_storage == nullptr) {
89+
fprintf(stderr, "liblcf: ucnv_open() error for dest encoding \"%s\": %s\n", storage_encoding.c_str(), u_errorName(status));
90+
throw std::runtime_error("ucnv_open() failed");
91+
}
92+
93+
sg.Dismiss();
94+
95+
_conv_runtime = conv_runtime;
96+
_conv_storage = conv_storage;
97+
#else
98+
_conv_runtime = const_cast<char*>("UTF-8");
99+
_conv_storage = const_cast<char*>(_encoding.c_str());
100+
#endif
101+
}
102+
103+
void Encoder::Reset() {
104+
#ifdef LCF_SUPPORT_ICU
105+
auto* conv = reinterpret_cast<UConverter*>(_conv_runtime);
106+
if (conv) ucnv_close(conv);
107+
conv = reinterpret_cast<UConverter*>(_conv_storage);
108+
if (conv) ucnv_close(conv);
109+
#endif
110+
}
111+
112+
113+
void Encoder::Convert(std::string& str, void* conv_dst_void, void* conv_src_void) {
114+
#ifdef LCF_SUPPORT_ICU
115+
const auto& src = str;
116+
auto* conv_dst = reinterpret_cast<UConverter*>(conv_dst_void);
117+
auto* conv_src = reinterpret_cast<UConverter*>(conv_src_void);
118+
119+
auto status = U_ZERO_ERROR;
120+
_buffer.resize(src.size() * 4);
121+
122+
const auto* src_p = src.c_str();
123+
auto* dst_p = _buffer.data();
124+
125+
ucnv_convertEx(conv_dst, conv_src,
126+
&dst_p, dst_p + _buffer.size(),
127+
&src_p, src_p + src.size(),
128+
nullptr, nullptr, nullptr, nullptr,
129+
true, true,
130+
&status);
131+
132+
if (U_FAILURE(status)) {
133+
fprintf(stderr, "liblcf: ucnv_convertEx() error when encoding \"%s\": %s\n", src.c_str(), u_errorName(status));
134+
_buffer.clear();
135+
}
136+
137+
str.assign(_buffer.data(), dst_p);
138+
return;
139+
#else
140+
auto* conv_dst = reinterpret_cast<const char*>(conv_dst_void);
141+
auto* conv_src = reinterpret_cast<const char*>(conv_src_void);
142+
iconv_t cd = iconv_open(conv_dst, conv_src);
143+
if (cd == (iconv_t)-1)
144+
return;
145+
char *src = &str.front();
146+
size_t src_left = str.size();
147+
size_t dst_size = str.size() * 5 + 10;
148+
_buffer.resize(dst_size);
149+
char *dst = _buffer.data();
150+
size_t dst_left = dst_size;
151+
# ifdef ICONV_CONST
152+
char ICONV_CONST *p = src;
153+
# else
154+
char *p = src;
155+
# endif
156+
char *q = dst;
157+
size_t status = iconv(cd, &p, &src_left, &q, &dst_left);
158+
iconv_close(cd);
159+
if (status == (size_t) -1 || src_left > 0) {
160+
str.clear();
161+
return;
162+
}
163+
*q++ = '\0';
164+
str.assign(dst, dst_size - dst_left);
165+
return;
166+
#endif
167+
}
168+
169+

src/encoder.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* This file is part of liblcf. Copyright (c) 2018 liblcf authors.
3+
* https://github.com/EasyRPG/liblcf - https://easyrpg.org
4+
*
5+
* liblcf is Free/Libre Open Source Software, released under the MIT License.
6+
* For the full copyright and license information, please view the COPYING
7+
* file that was distributed with this source code.
8+
*/
9+
10+
#ifndef LCF_ENCODER_H
11+
#define LCF_ENCODER_H
12+
#include <vector>
13+
#include <string>
14+
15+
class Encoder {
16+
public:
17+
explicit Encoder(std::string encoding);
18+
19+
Encoder(const Encoder&) = delete;
20+
Encoder& operator=(const Encoder&) = delete;
21+
22+
~Encoder();
23+
24+
void Encode(std::string& str);
25+
void Decode(std::string& str);
26+
27+
const std::string& GetEncoding() const;
28+
private:
29+
void Init();
30+
void Reset();
31+
void Convert(std::string& str, void* conv_dst, void* conv_src);
32+
private:
33+
void* _conv_storage = nullptr;
34+
void* _conv_runtime = nullptr;
35+
std::vector<char> _buffer;
36+
std::string _encoding;
37+
};
38+
39+
40+
inline const std::string& Encoder::GetEncoding() const {
41+
return _encoding;
42+
}
43+
44+
#endif

src/reader_lcf.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717

1818
std::string LcfReader::error_str;
1919

20-
LcfReader::LcfReader(std::istream& filestream, std::string encoding) :
21-
encoding(encoding),
22-
stream(filestream)
20+
LcfReader::LcfReader(std::istream& filestream, std::string encoding)
21+
: stream(filestream)
22+
, encoder(std::move(encoding))
2323
{
2424
}
2525

@@ -181,10 +181,9 @@ void LcfReader::Read<uint32_t>(std::vector<uint32_t> &buffer, size_t size) {
181181
}
182182

183183
void LcfReader::ReadString(std::string& ref, size_t size) {
184-
char* chars = new char[size];
185-
Read(chars, 1, size);
186-
ref = Encode(std::string(chars, size));
187-
delete[] chars;
184+
ref.resize(size);
185+
Read((size > 0 ? &ref.front(): nullptr), 1, size);
186+
Encode(ref);
188187
}
189188

190189
bool LcfReader::IsOk() const {
@@ -271,8 +270,8 @@ const std::string& LcfReader::GetError() {
271270
return error_str;
272271
}
273272

274-
std::string LcfReader::Encode(const std::string& str_to_encode) {
275-
return ReaderUtil::Recode(str_to_encode, encoding, "UTF-8");
273+
void LcfReader::Encode(std::string& str) {
274+
encoder.Encode(str);
276275
}
277276

278277
int LcfReader::IntSize(unsigned int x) {

src/reader_lcf.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <stdint.h>
2020
#include "lcf_options.h"
2121
#include "reader_util.h"
22+
#include "encoder.h"
2223

2324
/*
2425
* Calls SkipDebug() instead of Skip() for debug builds.
@@ -199,10 +200,9 @@ class LcfReader {
199200
* Encodes a string to UTF-8 using the set encoding
200201
* in the reader constructor.
201202
*
202-
* @param str_to_encode string to encode.
203-
* @return UTF-8 version of string.
203+
* @param str to convert from encoding to UTF-8
204204
*/
205-
std::string Encode(const std::string& str_to_encode);
205+
void Encode(std::string& str);
206206

207207
/**
208208
* Calculates the size of a compressed integer.
@@ -213,12 +213,12 @@ class LcfReader {
213213
static int IntSize(unsigned int x);
214214

215215
private:
216-
/** Name of the encoding. */
217-
std::string encoding;
218216
/** File-stream managed by this Reader. */
219217
std::istream& stream;
220218
/** Contains the last set error. */
221219
static std::string error_str;
220+
/** The internal Encoder */
221+
Encoder encoder;
222222

223223
/**
224224
* Converts a 16bit signed integer to/from little-endian.

0 commit comments

Comments
 (0)