Skip to content

Commit 1bfd6cd

Browse files
committed
fixcompile
1 parent 1e76bb5 commit 1bfd6cd

File tree

3 files changed

+40
-37
lines changed

3 files changed

+40
-37
lines changed

CMakeLists.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cmake_minimum_required(VERSION 3.12)
22

3-
set(CMAKE_CXX_STANDARD 23)
3+
set(CMAKE_CXX_STANDARD 20)
44

55
project(main)
66

@@ -11,11 +11,12 @@ else()
1111
endif()
1212

1313
find_package(fmt REQUIRED)
14+
find_package(Boost REQUIRED COMPONENTS locale)
1415

1516
file(GLOB sources CONFIGURE_DEPENDS "*.cpp")
1617
foreach(source IN LISTS sources)
1718
get_filename_component(name ${source} NAME_WE)
1819
add_executable(${name} ${source})
19-
target_link_libraries(${name} PRIVATE fmt)
20+
target_link_libraries(${name} PRIVATE fmt boost_locale)
2021
target_include_directories(${name} PRIVATE include)
2122
endforeach()

cppguidebook.typ

+36-33
Original file line numberDiff line numberDiff line change
@@ -2266,6 +2266,10 @@ void thisFuncAcceptsUTF16(UTF16String msg);
22662266

22672267
#detail[其中转换函数签名为 `std::string u8toansi(std::u8string s)`,很可惜,标准库并没有提供这个函数,直到 C++26 前,标准库对字符编码支持一直很差,你不得不自己实现或依赖第三方库。]
22682268

2269+
==== u8 字符串常量转换问题
2270+
2271+
TODO
2272+
22692273
== 选择你的阵营!
22702274

22712275
#image("pic/utfwar.png")
@@ -2599,16 +2603,14 @@ $ sudo pacman -S boost
25992603
using boost::locale::conv::utf_to_utf;
26002604
26012605
int main() {
2602-
std::u8string s8 = u8"你好";
2606+
std::string s8 = u8"你好";
26032607
// UTF-8 转 UTF-32:
26042608
std::u32string s32 = utf_to_utf<char32_t>(s8);
26052609
// UTF-32 转 UTF-16:
26062610
std::u16string s16 = utf_to_utf<char16_t>(s8);
26072611
// UTF-32 转 UTF-8:
2608-
s8 = utf_to_utf<char8_t>(s32);
2609-
// UTF-32 转 UTF-8 (但以 char 存储):
2610-
std::string s8c = utf_to_utf<char>(s32);
2611-
std::cout << s8c << '\n';
2612+
s8 = utf_to_utf<char>(s32);
2613+
std::cout << s8 << '\n';
26122614
return 0;
26132615
}
26142616
```
@@ -2617,10 +2619,12 @@ int main() {
26172619

26182620
比如从 `char32_t` 转到 `char16_t`,只需要 `utf_to_utf<char32_t>` 就可以,非常方便。
26192621

2622+
#warn[`boost::locale` 有一个缺点 TODO]
2623+
26202624
编译:
26212625

26222626
```bash
2623-
$ g++ -std=c++20 -lboost_locale main.cpp
2627+
$ g++ -std=c++17 -lboost_locale main.cpp
26242628
```
26252629

26262630
输出:
@@ -2653,7 +2657,7 @@ int main() {
26532657
std::wstring ws = to_utf<wchar_t>(s, "GBK");
26542658
std::wcout << ws << '\n';
26552659
// 从 UTF-16 转回 GBK
2656-
s = from_utf<char>(ws, "GBK");
2660+
s = from_utf(ws, "GBK");
26572661
std::wcout << s << '\n';
26582662
return 0;
26592663
}
@@ -2665,6 +2669,8 @@ int main() {
26652669

26662670
`to_char<char16_t>` 则是无论什么平台,都会转为 UTF-16。
26672671

2672+
`from_utf` 不需要指定任何模板参数,因为他总是返回 `std::string`(ANSI 或 GBK 编码的字符串),参数是什么编码,会自动通过重载判断,例如 `from_utf(ws, "GBK")` 这里的参数是 `wchar_t`,那么在 Windows 上,他会检测到 `wchar_t` 是 2 字节,就认为是 UTF-16 到 GBK 的转换。
2673+
26682674
==== UTF 和 ANSI 互转
26692675

26702676
我们程序的用户不一定是中国用户(GBK),也可能是俄罗斯用户(CP1251)、日本用户(Shift-JIS)、西班牙用户(CP1252)等。
@@ -2681,11 +2687,11 @@ using boost::locale::conv::from_utf;
26812687
using boost::locale::conv::to_utf;
26822688
26832689
int main() {
2684-
std::u8string u8s = u8"你好";
2690+
std::string u8s = u8"你好";
26852691
// UTF-8 转 ANSI
2686-
std::string s = from_utf<char>(u8s, "");
2692+
std::string s = from_utf(u8s, "");
26872693
// ANSI 转 UTF-8
2688-
u8s = to_utf<char8_t>(s, "");
2694+
u8s = to_utf<char>(s, "");
26892695
return 0;
26902696
}
26912697
```
@@ -2728,16 +2734,14 @@ int main() {
27282734
inset: 3pt,
27292735
align: horizon,
27302736
[函数名称], [从], [到],
2731-
[`from_utf<char>("GBK", string)`], [UTF-8], [GBK],
2732-
[`from_utf<char>("GBK", u8string)`], [UTF-8], [GBK],
2733-
[`from_utf<char>("GBK", u16string)`], [UTF-16], [GBK],
2734-
[`from_utf<char>("GBK", u32string)`], [UTF-32], [GBK],
2735-
[`from_utf<char>("GBK", wstring)`], [Linux 上UTF-32 \ Win 上 UTF-16], [GBK],
2736-
[`from_utf<char>("", string)`], [UTF-8], [区域设置],
2737-
[`from_utf<char>("", u8string)`], [UTF-8], [区域设置],
2738-
[`from_utf<char>("", u16string)`], [UTF-16], [区域设置],
2739-
[`from_utf<char>("", u32string)`], [UTF-32], [区域设置],
2740-
[`from_utf<char>("", wstring)`], [Linux 上UTF-32 \ Win 上 UTF-16], [区域设置],
2737+
[`from_utf("GBK", string)`], [UTF-8], [GBK],
2738+
[`from_utf("GBK", u16string)`], [UTF-16], [GBK],
2739+
[`from_utf("GBK", u32string)`], [UTF-32], [GBK],
2740+
[`from_utf("GBK", wstring)`], [Linux 上UTF-32 \ Win 上 UTF-16], [GBK],
2741+
[`from_utf("", string)`], [UTF-8], [区域设置],
2742+
[`from_utf("", u16string)`], [UTF-16], [区域设置],
2743+
[`from_utf("", u32string)`], [UTF-32], [区域设置],
2744+
[`from_utf("", wstring)`], [Linux 上UTF-32 \ Win 上 UTF-16], [区域设置],
27412745
)
27422746

27432747
==== 指定处理错误的方法
@@ -2753,9 +2757,9 @@ int main() {
27532757
using boost::locale::conv::from_utf;
27542758
27552759
int main() {
2756-
std::u8string utf8 = u8"我爱𰻞𰻞面";
2760+
std::string utf8 = u8"我爱𰻞𰻞面";
27572761
// UTF-8 转 GBK
2758-
std::string gbk = from_utf<char>(utf8, "GBK");
2762+
std::string gbk = from_utf(utf8, "GBK");
27592763
// 错误,“𰻞”无法用 GBK 表示!
27602764
std::cout << gbk << '\n';
27612765
// 在 Windows 的 GBK 终端上,只显示“我爱面”
@@ -2777,10 +2781,10 @@ using boost::locale::conv::from_utf;
27772781
using boost::locale::conv::method_type;
27782782
27792783
int main() {
2780-
std::u8string utf8 = u8"我爱𰻞𰻞面";
2784+
std::string utf8 = u8"我爱𰻞𰻞面";
27812785
// UTF-8 转 GBK
2782-
std::string gbk = from_utf<char>(utf8, "GBK",
2783-
method_type::stop);
2786+
std::string gbk = from_utf(utf8, "GBK",
2787+
method_type::stop);
27842788
// 错误,“𰻞”无法用 GBK 表示!
27852789
// from_utf 会抛出 `conversion_error` 异常
27862790
std::cout << gbk << '\n';
@@ -2802,7 +2806,7 @@ void try_save(std::u32string content, std::wstring path) {
28022806
std::string binary;
28032807
try {
28042808
// 尝试将 UTF-32 转成 GBK 编码
2805-
binary = from_utf<char>(content, "GBK",
2809+
binary = from_utf(content, "GBK",
28062810
method_type::stop);
28072811
} catch (conversion_error const &e) { // 若 GBK 无法表示
28082812
// 改用前面带有 BOM 的 UTF-8 编码
@@ -2821,8 +2825,8 @@ void try_save(std::u32string content, std::wstring path) {
28212825
using boost::locale::conv::from_utf;
28222826
using boost::locale::conv::utf_to_utf;
28232827
2824-
void print(std::u8string msg) {
2825-
std::cout << from_utf<char>(msg, "");
2828+
void u8print(std::string msg) {
2829+
std::cout << from_utf(msg, "");
28262830
// 或者:
28272831
// std::wcout << utf_to_utf<wchar_t>(msg, "");
28282832
}
@@ -3061,14 +3065,13 @@ https://en.cppreference.com/w/cpp/string/byte/isspace
30613065
对于 Linux 用户,也可以检测如果是 Linux 系统,则什么转换都不做,因为 Linux 用户几乎都是 UTF-8,那么 `const char8_t *` 可以强转为 `const char *` 而不用任何额外开销。
30623066

30633067
```cpp
3064-
std::string to_os_string(std::u8string const &u8s) {
3068+
std::string to_os_string(std::string const &u8s) {
30653069
#if _WIN32
30663070
// UTF-8 到 ANSI
3067-
return boost::locale::conv::from_utf<char>(u8s, "");
3071+
return boost::locale::conv::from_utf(u8s, "");
30683072
#elif __linux__
3069-
return std::string(
3070-
reinterpret_cast<const char *>(u8s.c_str()),
3071-
u8s.size());
3073+
// 不转换
3074+
return u8s;
30723075
#else
30733076
#error "Unsupported system."
30743077
#endif

e1.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#include <fmt/format.h>
22
#include <boost/locale.hpp>
3-
#include <iostream>
43

54
using boost::locale::conv::utf_to_utf;
65
using boost::locale::conv::from_utf;
@@ -13,6 +12,6 @@ int main() {
1312
std::u16string s16 = utf_to_utf<char16_t>(s);
1413
// UTF-16 转 UTF-8:
1514
s = utf_to_utf<char8_t>(s32);
16-
fmt::println("{}", from_utf<char>(s));
15+
fmt::println("{}", from_utf(s, ""));
1716
return 0;
1817
}

0 commit comments

Comments
 (0)