Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: change all narrow filesystem paths and operations to speak UTF-8 #67

Merged
merged 17 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@
[submodule "dep/glm"]
path = dep/glm
url = https://github.com/g-truc/glm.git
[submodule "libs/luautf8"]
path = libs/luautf8
url = https://github.com/starwing/luautf8.git
20 changes: 20 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,26 @@ target_link_libraries(lcurl
install(TARGETS lcurl RUNTIME DESTINATION ".")
install(FILES $<TARGET_RUNTIME_DLLS:lcurl> DESTINATION ".")

# luautf8 module

add_library(lua-utf8 SHARED libs/luautf8/lutf8lib.c)

target_compile_definitions(lua-utf8
PRIVATE
LUA_BUILD_AS_DLL
)

target_include_directories(lua-utf8
PRIVATE
)

target_link_libraries(lua-utf8
PRIVATE
LuaJIT::LuaJIT
)

install(TARGETS lua-utf8 RUNTIME DESTINATION ".")
install(FILES $<TARGET_RUNTIME_DLLS:lua-utf8> DESTINATION ".")

# lzip module

Expand Down
29 changes: 27 additions & 2 deletions engine/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
#include "common/memtrak3.h"
#endif

#include <string>
#include <string_view>
#include <vector>

// =======
// Classes
// =======
Expand Down Expand Up @@ -475,15 +479,36 @@ T clamp(T &v, T l, T u)
// Common Functions
// ================

int IsColorEscape(const char* str);
void ReadColorEscape(const char* str, col3_t out);
int IsColorEscape(char const* str);
int IsColorEscape(std::u32string_view str);
void ReadColorEscape(char const* str, col3_t out);
std::u32string_view ReadColorEscape(std::u32string_view str, col3_t out);

char* _AllocString(const char* str, const char* file, int line);
#define AllocString(s) _AllocString(s, __FILE__, __LINE__)
char* _AllocStringLen(size_t len, const char* file, int line);
#define AllocStringLen(s) _AllocStringLen(s, __FILE__, __LINE__)
void FreeString(const char* str);
dword StringHash(const char* str, int mask);
dword StringHash(std::string_view str, int mask);

struct IndexedUTF32String {
std::u32string text;
std::vector<size_t> sourceCodeUnitOffsets;
};

IndexedUTF32String IndexUTF8ToUTF32(std::string_view str);

#ifdef _WIN32
wchar_t* WidenANSIString(const char* str);
wchar_t* WidenOEMString(const char* str);
wchar_t* WidenUTF8String(const char* str);
void FreeWideString(wchar_t* str);

char* NarrowANSIString(const wchar_t* str);
char* NarrowOEMString(const wchar_t* str);
char* NarrowUTF8String(const wchar_t* str);
#endif

#ifndef _WIN32
#define _stricmp strcasecmp
Expand Down
222 changes: 219 additions & 3 deletions engine/common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,10 @@ int IsColorEscape(const char* str)
}
if (isdigit(str[1])) {
return 2;
} else if (str[1] == 'x' || str[1] == 'X') {
}
else if (str[1] == 'x' || str[1] == 'X') {
for (int c = 0; c < 6; c++) {
if ( !isxdigit(str[c + 2]) ) {
if (!isxdigit(str[c + 2])) {
return 0;
}
}
Expand All @@ -216,23 +217,77 @@ int IsColorEscape(const char* str)
return 0;
}

int IsColorEscape(std::u32string_view str)
{
if (str.size() < 2 || str[0] != '^') {
return 0;
}

auto discrim = str[1];

// Check for indexed colour escape like ^7.
// Avoid using isdigit as we only accept arabic numerals.
if (discrim >= U'0' && discrim <= U'9') {
return 2;
}

// Check for direct colour escape like ^x123ABC.
if (str.size() >= 8 && (discrim == 'x' || discrim == 'X')) {
for (int c = 0; c < 6; c++) {
auto ch = str[c + 2];
bool const isHexDigit = (ch >= U'0' && ch <= U'9') || (ch >= U'A' && ch <= U'F') || (ch >= U'a' && ch <= U'f');
if (!isHexDigit) {
return 0;
}
}
return 8;
}

// Fallthrough indicates no recognized colour code.
return 0;
}

void ReadColorEscape(const char* str, col3_t out)
{
int len = IsColorEscape(str);
switch (len) {
case 2:
VectorCopy(colorEscape[str[1] - '0'], out);
break;
case 8:
{
int xr, xg, xb;
sscanf(str + 2, "%2x%2x%2x", &xr, &xg, &xb);
out[0] = xr / 255.0f;
out[1] = xg / 255.0f;
out[2] = xb / 255.0f;
}
break;
}
}

std::u32string_view ReadColorEscape(std::u32string_view str, col3_t out)
{
int len = IsColorEscape(str);
switch (len) {
case 2:
VectorCopy(colorEscape[str[1] - U'0'], out);
break;
case 8:
{
int xr, xg, xb;
sscanf(str + 2, "%2x%2x%2x", &xr, &xg, &xb);
char buf[7]{};
for (size_t i = 0; i < 6; ++i) {
buf[i] = (char)str[i + 2];
}
sscanf(buf, "%2x%2x%2x", &xr, &xg, &xb);
out[0] = xr / 255.0f;
out[1] = xg / 255.0f;
out[2] = xb / 255.0f;
}
break;
}
return str.substr(len);
}

// ================
Expand Down Expand Up @@ -279,3 +334,164 @@ dword StringHash(const char* str, int mask)
}
return hash & mask;
}

dword StringHash(std::string_view str, int mask)
{
size_t len = str.length();
dword hash = 0;
for (size_t i = 0; i < len; i++) {
hash += (str[i] * 4999) ^ (((dword)i + 17) * 2003);
}
return hash & mask;
}

#ifdef _WIN32
#include <Windows.h>

static wchar_t* WidenCodepageString(const char* str, UINT codepage)
{
if (!str) {
return nullptr;
}
// Early-out if empty, avoids ambigious error return from MBTWC.
if (!*str) {
wchar_t* wstr = new wchar_t[1];
*wstr = L'\0';
return wstr;
}
DWORD cb = (DWORD)strlen(str);
int cch = MultiByteToWideChar(codepage, MB_ERR_INVALID_CHARS, str, cb, nullptr, 0);
if (cch == 0) {
// Invalid string or other error.
return nullptr;
}
wchar_t* wstr = new wchar_t[cch + 1]; // sized MBTWC doesn't include terminator.
MultiByteToWideChar(codepage, 0, str, cb, wstr, cch);
wstr[cch] = '\0';
return wstr;
}

wchar_t* WidenANSIString(const char* str)
{
return WidenCodepageString(str, CP_ACP);
}

wchar_t* WidenOEMString(const char* str)
{
return WidenCodepageString(str, CP_OEMCP);
}

wchar_t* WidenUTF8String(const char* str)
{
return WidenCodepageString(str, CP_UTF8);
}

char* NarrowCodepageString(const wchar_t* str, UINT codepage)
{
if (!str) {
return nullptr;
}
if (!*str) {
char* nstr = new char[1];
*nstr = '\0';
return nstr;
}
DWORD cch = (DWORD)wcslen(str);
int cb = WideCharToMultiByte(codepage, 0, str, cch, nullptr, 0, nullptr, nullptr);
if (cb == 0) {
// Invalid string or other error.
return nullptr;
}
char* nstr = new char[cb + 1];
WideCharToMultiByte(codepage, 0, str, cch, nstr, cb, nullptr, nullptr);
nstr[cb] = '\0';
return nstr;
}

void FreeWideString(wchar_t* str)
{
if (str) {
delete[] str;
}
}

char* NarrowANSIString(const wchar_t* str)
{
return NarrowCodepageString(str, CP_ACP);
}

char* NarrowOEMString(const wchar_t* str)
{
return NarrowCodepageString(str, CP_OEMCP);
}

char* NarrowUTF8String(const wchar_t* str)
{
return NarrowCodepageString(str, CP_UTF8);
}

IndexedUTF32String IndexUTF8ToUTF32(std::string_view input)
{
IndexedUTF32String ret{};

size_t byteCount = input.size();
auto& offsets = ret.sourceCodeUnitOffsets;
offsets.reserve(byteCount); // conservative reservation
std::vector<char32_t> codepoints;

auto bytes = (uint8_t const*)input.data();
for (size_t byteIdx = 0; byteIdx < byteCount;) {
uint8_t const* b = bytes + byteIdx;
size_t left = byteCount - byteIdx;
offsets.push_back(byteIdx);

char32_t codepoint{};
if (*b >> 7 == 0b0) { // 0xxx'xxxx
codepoint = *b;
byteIdx += 1;
}
else if (left >= 2 &&
b[0] >> 5 == 0b110 &&
b[1] >> 6 == 0b10)
{
auto p0 = (uint32_t)b[0] & 0b1'1111;
auto p1 = (uint32_t)b[1] & 0b11'1111;
codepoint = p0 << 6 | p1;
byteIdx += 2;
}
else if (left >= 3 &&
b[0] >> 4 == 0b1110 &&
b[1] >> 6 == 0b10 &&
b[2] >> 6 == 0b10)
{
auto p0 = (uint32_t)b[0] & 0b1111;
auto p1 = (uint32_t)b[1] & 0b11'1111;
auto p2 = (uint32_t)b[2] & 0b11'1111;
codepoint = p0 << 12 | p1 << 6 | p2;
byteIdx += 3;
}
else if (left >= 4 &&
b[0] >> 3 == 0b11110 &&
b[1] >> 6 == 0b10 &&
b[2] >> 6 == 0b10 &&
b[3] >> 6 == 0b10)
{
auto p0 = (uint32_t)b[0] & 0b111;
auto p1 = (uint32_t)b[1] & 0b11'1111;
auto p2 = (uint32_t)b[2] & 0b11'1111;
auto p3 = (uint32_t)b[2] & 0b11'1111;
codepoint = p0 << 18 | p1 << 12 | p2 << 6 | p3;
byteIdx += 4;
}
else {
codepoints.push_back(0xFFFDu);
byteIdx += 1;
}
codepoints.push_back(codepoint);
}

ret.text = std::u32string(codepoints.begin(), codepoints.end());
return ret;
}

#endif
16 changes: 12 additions & 4 deletions engine/common/streams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,10 +255,14 @@ bool fileInputStream_c::Read(void* out, size_t len)
return fread(out, len, 1, file) < 1;
}

bool fileInputStream_c::FileOpen(const char* fileName, bool binary)
bool fileInputStream_c::FileOpen(std::filesystem::path const& fileName, bool binary)
{
FileClose();
file = fopen(fileName, binary? "rb" : "r");
#ifdef _WIN32
file = _wfopen(fileName.c_str(), binary ? L"rb" : L"r");
#else
file = fopen(fileName.c_str(), binary ? "rb" : "r");
#endif
if ( !file ) {
return true;
}
Expand All @@ -277,10 +281,14 @@ bool fileOutputStream_c::Write(const void* in, size_t len)
return fwrite(in, len, 1, file) < 1;
}

bool fileOutputStream_c::FileOpen(const char* fileName, bool binary)
bool fileOutputStream_c::FileOpen(std::filesystem::path const& fileName, bool binary)
{
FileClose();
file = fopen(fileName, binary? "wb" : "w");
#ifdef _WIN32
file = _wfopen(fileName.c_str(), binary ? L"wb" : L"w");
#else
file = fopen(fileName.c_str(), binary ? "wb" : "w");
#endif
if ( !file ) {
return true;
}
Expand Down
Loading