@@ -823,3 +823,58 @@ TEST_CASE( "testPrintCompact" )
823
823
check_range (-1000000.0 ,10000000.0 );
824
824
check_range (-1.0E32 ,1.0E32 );
825
825
}// void testPrintCompact()
826
+
827
+
828
+ TEST_CASE ( " testValidUtf8" )
829
+ {
830
+ using namespace SpecUtils ;
831
+
832
+ // Valid UTF-8 strings
833
+ const char * valid_utf8_strings[] = {
834
+ " Hello, World!" , // Basic ASCII
835
+ " Привет" , // Cyrillic
836
+ " こんにちは" , // Japanese
837
+ " 😊" , // Emoji
838
+ " \xE2\x9C\x94 " , // Check mark (U+2714)
839
+ " \xF0\x9F\x98\x81 " , // Grinning face (U+1F600)
840
+ " \xE2\x82\xAC " , // Euro sign (U+20AC)
841
+ " \xF0\x9F\x8C\x90 " , // Earth globe (U+1F30D)
842
+ " \xF0\x9F\x92\xA9 " , // Money bag (U+1F4B0)
843
+ };
844
+
845
+ for (const auto & str : valid_utf8_strings) {
846
+ CHECK (valid_utf8 (str, std::strlen (str)));
847
+ }
848
+
849
+ // Invalid UTF-8 strings
850
+ const char * invalid_utf8_strings[] = {
851
+ " \x80 " , // Invalid start byte
852
+ " \xC3\x28 " , // Invalid continuation byte
853
+ " \xE2\x82\x28 " , // Invalid continuation byte
854
+ " \xF0\x28\x8C\x28 " , // Invalid continuation byte
855
+ " \xF0\x9F\x98\x28 " , // Invalid continuation byte
856
+ " \xC3\xA9\xC3\x28 " , // Mixed valid and invalid
857
+ " \xE2\x82\xAC\xE2\x28 " , // Mixed valid and invalid
858
+ " \xF0\x9F\x92\xA9\xF0\x28 " , // Mixed valid and invalid
859
+ };
860
+
861
+ for (const auto & str : invalid_utf8_strings) {
862
+ CHECK (!valid_utf8 (str, std::strlen (str)));
863
+ }
864
+
865
+
866
+ // Edge cases
867
+ CHECK (valid_utf8 (" " , 0 )); // Empty string
868
+ CHECK (valid_utf8 (" \xC2\xA9 " , 2 )); // Single valid UTF-8 character (©)
869
+ CHECK (!valid_utf8 (" \xC2 " , 1 )); // Incomplete multibyte sequence
870
+ CHECK (!valid_utf8 (" \xE2\x82 " , 2 )); // Incomplete multibyte sequence
871
+ CHECK (!valid_utf8 (" \xF0\x9F\x98 " , 3 )); // Incomplete multibyte sequence
872
+
873
+ // Large valid UTF-8 string
874
+ std::string large_valid_utf8 (10000 , ' a' ); // A large string of 'a's
875
+ CHECK (valid_utf8 (large_valid_utf8.c_str (), large_valid_utf8.size ()));
876
+
877
+ // Large invalid UTF-8 string
878
+ std::string large_invalid_utf8 (10000 , ' \x80 ' ); // A large string of invalid bytes
879
+ CHECK (!valid_utf8 (large_invalid_utf8.c_str (), large_invalid_utf8.size ()));
880
+ }// TEST_CASE( "testValidUtf8" )
0 commit comments