From 64cf30a54589f8dd9010097e02caa4d1bf8ceca0 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Tue, 20 Feb 2024 07:53:00 -0700 Subject: [PATCH] adjustment and workaround for macOS iconv (#805) * work around issues with macOS iconv library * add arm64osx to CI Closes #797 --- .github/workflows/ci.yml | 4 ++++ c/prim5.c | 47 +++++++++++++++++++++++++++++++++++++++- c/version.h | 2 ++ mats/io.ms | 8 +++++-- 4 files changed, 58 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2892b53b7..4a3a627e2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,6 +15,10 @@ jobs: os: macos-12 - machine: ta6osx os: macos-12 + - machine: arm64osx + os: macos-14 + - machine: tarm64osx + os: macos-14 - machine: i3le os: ubuntu-22.04 - machine: ti3le diff --git a/c/prim5.c b/c/prim5.c index 90b087f12..78eaf0cf9 100644 --- a/c/prim5.c +++ b/c/prim5.c @@ -2273,6 +2273,50 @@ static void s_iconv_close(uptr cd) { ICONV_CLOSE((iconv_t)cd); } +#ifdef DISTRUST_ICONV_PROGRESS +# define ICONV_FROM iconv_fixup +static size_t iconv_fixup(iconv_t cd, char **src, size_t *srcleft, char **dst, size_t *dstleft) { + size_t r; + char *orig_src = *src, *orig_dst = *dst; + size_t orig_srcleft = *srcleft, orig_dstleft = *dstleft, srcuntried = 0; + + while (1) { + r = iconv((iconv_t)cd, src, srcleft, dst, dstleft); + if ((r == (size_t)-1) + && (errno == E2BIG) + && ((*srcleft < orig_srcleft) || (*dstleft < orig_dstleft))) { + /* Avoid a macOS (as of 14.2.1 and 14.3.1) iconv bug in this + case, where we don't trust that consumed input characters are + reflected in the output pointer. Reverting progress should be + ok for a correct iconv, too, since a -1 result means that no + irreversible progress was made. */ + *src = orig_src; + *dst = orig_dst; + *srcleft = orig_srcleft; + *dstleft = orig_dstleft; + + /* We need to make progress, if possible, to satify normal iconv + behavior and "io.ss" expectations. Try converting fewer + characters. */ + if (orig_srcleft > sizeof(string_char)) { + size_t try_chars = (orig_srcleft / sizeof(string_char)) / 2; + srcuntried += orig_srcleft - (try_chars * sizeof(string_char)); + orig_srcleft = try_chars * sizeof(string_char); + *srcleft = orig_srcleft; + } else + break; + } else + break; + } + + *srcleft += srcuntried; + + return r; +} +#else +# define ICONV_FROM ICONV +#endif + #define ICONV_BUFSIZ 400 static ptr s_iconv_from_string(uptr cd, ptr in, uptr i, uptr iend, ptr out, uptr o, uptr oend) { @@ -2298,7 +2342,8 @@ static ptr s_iconv_from_string(uptr cd, ptr in, uptr i, uptr iend, ptr out, uptr under Windows, the iconv dll might have been linked against a different C runtime and might therefore set a different errno */ errno = 0; - ICONV((iconv_t)cd, (ICONV_INBUF_TYPE)&inbuf, &inbytesleft, &outbuf, &outbytesleft); + ICONV_FROM((iconv_t)cd, (ICONV_INBUF_TYPE)&inbuf, &inbytesleft, &outbuf, &outbytesleft); + new_i = i + inmax - inbytesleft / sizeof(string_char); new_o = oend - outbytesleft; if (new_i != i || new_o != o) return Scons(Sinteger(new_i), Sinteger(new_o)); diff --git a/c/version.h b/c/version.h index e62922065..6eed74e1f 100644 --- a/c/version.h +++ b/c/version.h @@ -328,6 +328,8 @@ typedef int tputsputcchar; #define NSECCTIME(sb) (sb).st_ctimespec.tv_nsec #define NSECMTIME(sb) (sb).st_mtimespec.tv_nsec #define ICONV_INBUF_TYPE char ** +/* workaround issue in macOS 14.2.1 iconv: */ +#define DISTRUST_ICONV_PROGRESS #endif #if defined(__QNX__) diff --git a/mats/io.ms b/mats/io.ms index 98f68719c..06087b506 100644 --- a/mats/io.ms +++ b/mats/io.ms @@ -1012,7 +1012,9 @@ '() (if (fx= i #xD800) (f #xE000) - (cons i (f (fx+ i 1))))))) + (if (fx= i #xFEFF) ; avoid BOM, which an encoder is arguably justified in dropping + (f (fx+ i 1)) + (cons i (f (fx+ i 1)))))))) (define ls2 (let f ([n 1000000]) (if (fx= n 0) @@ -1021,7 +1023,9 @@ (let ([n (random (- #x110000 (- #xE000 #xD800)))]) (if (<= #xD800 n #xDFFF) (+ n (- #xE000 #xD800)) - n)) + (if (fx= n #xFEFF) ; avoid BOM + #xFEFE + n))) (f (fx- n 1)))))) (define s (apply string (map integer->char (append ls1 ls2)))) #;(define s (apply string (map integer->char ls1)))