@@ -196,6 +196,10 @@ static const SYSTEMTIME time_t_epoch_base_systemtime = {
196
196
197
197
#define FILETIME_CHUNKS_PER_SECOND (10000000UL)
198
198
199
+ #ifdef USE_ITHREADS
200
+ static perl_mutex win32_read_console_mutex ;
201
+ #endif
202
+
199
203
#ifdef SET_INVALID_PARAMETER_HANDLER
200
204
static BOOL silent_invalid_parameter_handler = FALSE;
201
205
@@ -3743,10 +3747,128 @@ win32_dup2(int fd1,int fd2)
3743
3747
return dup2 (fd1 ,fd2 );
3744
3748
}
3745
3749
3750
+ static int
3751
+ win32_read_console (int fd , U8 * buf , unsigned int cnt )
3752
+ {
3753
+ /* This function is a workaround for a bug in Windows:
3754
+ * https://github.com/microsoft/terminal/issues/4551
3755
+ * tl;dr: ReadFile() and ReadConsoleA() return garbage when reading
3756
+ * non-ASCII characters from the console with the 65001 codepage.
3757
+ */
3758
+ HANDLE h = (HANDLE )_get_osfhandle (fd );
3759
+ size_t left_to_read = cnt ;
3760
+ DWORD mode ;
3761
+
3762
+ if (h == INVALID_HANDLE_VALUE ) {
3763
+ errno = EBADF ;
3764
+ return -1 ;
3765
+ }
3766
+
3767
+ if (!GetConsoleMode (h , & mode )) {
3768
+ translate_to_errno ();
3769
+ return -1 ;
3770
+ }
3771
+
3772
+ while (left_to_read ) {
3773
+ /* The purpose of converted_buf is to preserve partial UTF-8 (or of any
3774
+ * other multibyte encoding) code points between read() calls. Since
3775
+ * there's only one console, the buffer is global. It's needed because
3776
+ * ReadConsoleW() returns a string of UTF-16 code units and its result,
3777
+ * after conversion to the current console codepage, may not fit in the
3778
+ * return buffer.
3779
+ *
3780
+ * The buffer's size is 8 because it will contain at most two UTF-8 code
3781
+ * points.
3782
+ */
3783
+ static char converted_buf [8 ];
3784
+ static size_t converted_buf_len = 0 ;
3785
+ WCHAR wbuf [2 ];
3786
+ DWORD wbuf_len = 0 , chars_read ;
3787
+
3788
+ if (converted_buf_len ) {
3789
+ bool newline = 0 ;
3790
+ size_t to_write = MIN (converted_buf_len , left_to_read );
3791
+
3792
+ /* Don't read anything if the *first* character is ^Z and
3793
+ * ENABLE_PROCESSED_INPUT is enabled. On some versions of Windows,
3794
+ * ReadFile() ignores ENABLE_PROCESSED_INPUT, but apparently it's a
3795
+ * bug: https://github.com/microsoft/terminal/issues/4958
3796
+ */
3797
+ if (left_to_read == cnt && (mode & ENABLE_PROCESSED_INPUT ) &&
3798
+ converted_buf [0 ] == 0x1a )
3799
+ break ;
3800
+
3801
+ /* Are we returning a newline? */
3802
+ if (memchr (converted_buf , '\n' , to_write ))
3803
+ newline = 1 ;
3804
+
3805
+ memcpy (buf , converted_buf , to_write );
3806
+ buf += to_write ;
3807
+
3808
+ /* If there's anything left in converted_buf, move it to the
3809
+ * beginning of the buffer. */
3810
+ converted_buf_len -= to_write ;
3811
+ if (converted_buf_len )
3812
+ memmove (
3813
+ converted_buf , converted_buf + to_write , converted_buf_len
3814
+ );
3815
+
3816
+ left_to_read -= to_write ;
3817
+
3818
+ /* With ENABLE_LINE_INPUT enabled, we stop reading after the first
3819
+ * newline, otherwise we stop reading after the first character. */
3820
+ if (!left_to_read || newline || (mode & ENABLE_LINE_INPUT ) == 0 )
3821
+ break ;
3822
+ }
3823
+
3824
+ /* Reading one code unit at a time is inefficient, but since this code
3825
+ * is used only for the interactive console, that shouldn't matter. */
3826
+ if (!ReadConsoleW (h , wbuf , 1 , & chars_read , 0 )) {
3827
+ translate_to_errno ();
3828
+ return -1 ;
3829
+ }
3830
+ if (!chars_read )
3831
+ break ;
3832
+
3833
+ ++ wbuf_len ;
3834
+
3835
+ if (wbuf [0 ] >= 0xD800 && wbuf [0 ] <= 0xDBFF ) {
3836
+ /* High surrogate, read one more code unit. */
3837
+ if (!ReadConsoleW (h , wbuf + 1 , 1 , & chars_read , 0 )) {
3838
+ translate_to_errno ();
3839
+ return -1 ;
3840
+ }
3841
+ if (chars_read )
3842
+ ++ wbuf_len ;
3843
+ }
3844
+
3845
+ converted_buf_len = WideCharToMultiByte (
3846
+ GetConsoleCP (), 0 , wbuf , wbuf_len , converted_buf ,
3847
+ sizeof (converted_buf ), NULL , NULL
3848
+ );
3849
+ if (!converted_buf_len ) {
3850
+ translate_to_errno ();
3851
+ return -1 ;
3852
+ }
3853
+ }
3854
+
3855
+ return cnt - left_to_read ;
3856
+ }
3857
+
3858
+
3746
3859
DllExport int
3747
3860
win32_read (int fd , void * buf , unsigned int cnt )
3748
3861
{
3749
- return read (fd , buf , cnt );
3862
+ int ret ;
3863
+ if (UNLIKELY (win32_isatty (fd ) && GetConsoleCP () == 65001 )) {
3864
+ MUTEX_LOCK (& win32_read_console_mutex );
3865
+ ret = win32_read_console (fd , buf , cnt );
3866
+ MUTEX_UNLOCK (& win32_read_console_mutex );
3867
+ }
3868
+ else
3869
+ ret = read (fd , buf , cnt );
3870
+
3871
+ return ret ;
3750
3872
}
3751
3873
3752
3874
DllExport int
@@ -4907,6 +5029,8 @@ Perl_win32_init(int *argcp, char ***argvp)
4907
5029
time_t_epoch_base_filetime .LowPart = ft .dwLowDateTime ;
4908
5030
time_t_epoch_base_filetime .HighPart = ft .dwHighDateTime ;
4909
5031
}
5032
+
5033
+ MUTEX_INIT (& win32_read_console_mutex );
4910
5034
}
4911
5035
4912
5036
void
0 commit comments