Skip to content

Commit 7fcaab7

Browse files
committed
MDEV-20912 Add support for utf8mb4_0900_* collations in MariaDB Server
This is done by mapping most of the existing MySQL unicode 0900 collations to MariadB 1400 unicode collations. The assumption is that 1400 is a super set of 0900 for all practical purposes. I also added a new function 'compare_collations()' and changed most code to use this instead of comparing character sets directly. This enables one to seamlessly mix-and-match the corresponding 0900 and 1400 sets. Field comparision and alter table treats the character sets as identical. All MySQL 8.0 0900 collations are supported except: - utf8mb4_ja_0900_as_cs - utf8mb4_ja_0900_as_cs_ks - utf8mb4_ru_0900_as_cs - utf8mb4_zh_0900_as_cs These do not have corresponding entries in the MariadB 01400 collations. Other things: - Added COMMENT colum to information_schema.collations. For utf8mb4_0900 colletions it contains the corresponding alias collation.
1 parent 9e7762e commit 7fcaab7

21 files changed

+6284
-102
lines changed

include/m_ctype.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -458,9 +458,9 @@ typedef struct my_charset_loader_st
458458
{
459459
char error[128];
460460
void *(*once_alloc)(size_t);
461-
void *(*malloc)(size_t);
462-
void *(*realloc)(void *, size_t);
463-
void (*free)(void *);
461+
void *(*malloc)(size_t); /* Not used */
462+
void *(*realloc)(void *, size_t); /* Not used */
463+
void (*free)(void *); /* Not used */
464464
void (*reporter)(enum loglevel, const char *format, ...);
465465
int (*add_collation)(struct charset_info_st *cs);
466466
} MY_CHARSET_LOADER;
@@ -1693,6 +1693,7 @@ my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len);
16931693
uint my_ci_get_id_generic(CHARSET_INFO *cs, my_collation_id_type_t type);
16941694
LEX_CSTRING my_ci_get_collation_name_generic(CHARSET_INFO *cs,
16951695
my_collation_name_mode_t mode);
1696+
my_bool compare_collations(CHARSET_INFO *cs1, CHARSET_INFO *cs2);
16961697

16971698
typedef struct
16981699
{

include/my_sys.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,6 +1119,9 @@ static inline my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2)
11191119
extern my_bool init_compiled_charsets(myf flags);
11201120
extern void add_compiled_collation(struct charset_info_st *cs);
11211121
extern void add_compiled_extra_collation(struct charset_info_st *cs);
1122+
extern my_bool add_alias_for_collation(LEX_CSTRING *collation_name,
1123+
LEX_CSTRING *alias,
1124+
uint alias_id);
11221125
extern size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
11231126
char *to, size_t to_length,
11241127
const char *from, size_t length,

libmariadb

mysql-test/main/ctype_ldml.result

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -456,43 +456,43 @@ select "foo" = "foo " collate latin1_test;
456456
1
457457
The following tests check that two-byte collation IDs work
458458
select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
459-
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
460-
ascii2_general_nopad_ci ascii2 318 1
461-
ascii2_bin2 ascii2 319 1
462-
ascii2_general_ci ascii2 320 Yes 1
463-
ascii2_bin ascii2 321 1
464-
ascii2_general_inherited_ci ascii2 322 1
465-
ascii2_general_inherited2_ci ascii2 323 1
466-
ascii2_badly_inherited_ci ascii2 324 1
467-
ascii2_nopad_bin ascii2 325 1
468-
utf8mb4_test_ci utf8mb4 326 8
469-
utf16_test_ci utf16 327 8
470-
utf8mb4_test_400_ci utf8mb4 328 8
471-
utf8mb4_test_520_nopad_ci utf8mb4 329 8
472-
utf8mb4_uca1400_test01_as_ci utf8mb4 330 4
473-
latin1_test latin1 331 1
474-
latin1_test2 latin1 332 1
475-
latin1_test2_cs latin1 333 1
476-
latin1_swedish_nopad2_ci latin1 334 1
477-
utf8mb3_bengali_standard_ci utf8mb3 336 8
478-
utf8mb3_bengali_traditional_ci utf8mb3 337 8
479-
utf8mb3_implicit_weights_ci utf8mb3 338 8
480-
utf8mb3_phone_ci utf8mb3 352 8
481-
utf8mb3_test_ci utf8mb3 353 8
482-
utf8mb3_5624_1 utf8mb3 354 8
483-
utf8mb3_5624_2 utf8mb3 355 8
484-
utf8mb3_5624_3 utf8mb3 356 8
485-
utf8mb3_5624_4 utf8mb3 357 8
486-
ucs2_test_ci ucs2 358 8
487-
ucs2_vn_ci ucs2 359 8
488-
ucs2_5624_1 ucs2 360 8
489-
utf8mb3_5624_5 utf8mb3 368 8
490-
utf8mb3_5624_5_bad utf8mb3 369 8
491-
utf8mb3_czech_test_w2 utf8mb3 370 4
492-
utf8mb3_czech_test_nopad_w2 utf8mb3 371 4
493-
utf8mb3_czech_test_bad_w2 utf8mb3 372 4
494-
utf32_test_ci utf32 391 8
495-
utf8mb3_maxuserid_ci utf8mb3 2047 8
459+
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN COMMENT
460+
ascii2_general_nopad_ci ascii2 318 1
461+
ascii2_bin2 ascii2 319 1
462+
ascii2_general_ci ascii2 320 Yes 1
463+
ascii2_bin ascii2 321 1
464+
ascii2_general_inherited_ci ascii2 322 1
465+
ascii2_general_inherited2_ci ascii2 323 1
466+
ascii2_badly_inherited_ci ascii2 324 1
467+
ascii2_nopad_bin ascii2 325 1
468+
utf8mb4_test_ci utf8mb4 326 8
469+
utf16_test_ci utf16 327 8
470+
utf8mb4_test_400_ci utf8mb4 328 8
471+
utf8mb4_test_520_nopad_ci utf8mb4 329 8
472+
utf8mb4_uca1400_test01_as_ci utf8mb4 330 4
473+
latin1_test latin1 331 1 cp1252 West European
474+
latin1_test2 latin1 332 1 cp1252 West European
475+
latin1_test2_cs latin1 333 1 cp1252 West European
476+
latin1_swedish_nopad2_ci latin1 334 1 cp1252 West European
477+
utf8mb3_bengali_standard_ci utf8mb3 336 8
478+
utf8mb3_bengali_traditional_ci utf8mb3 337 8
479+
utf8mb3_implicit_weights_ci utf8mb3 338 8
480+
utf8mb3_phone_ci utf8mb3 352 8
481+
utf8mb3_test_ci utf8mb3 353 8
482+
utf8mb3_5624_1 utf8mb3 354 8
483+
utf8mb3_5624_2 utf8mb3 355 8
484+
utf8mb3_5624_3 utf8mb3 356 8
485+
utf8mb3_5624_4 utf8mb3 357 8
486+
ucs2_test_ci ucs2 358 8
487+
ucs2_vn_ci ucs2 359 8
488+
ucs2_5624_1 ucs2 360 8
489+
utf8mb3_5624_5 utf8mb3 368 8
490+
utf8mb3_5624_5_bad utf8mb3 369 8
491+
utf8mb3_czech_test_w2 utf8mb3 370 4
492+
utf8mb3_czech_test_nopad_w2 utf8mb3 371 4
493+
utf8mb3_czech_test_bad_w2 utf8mb3 372 4
494+
utf32_test_ci utf32 391 8
495+
utf8mb3_maxuserid_ci utf8mb3 2047 8
496496
show collation like '%test%';
497497
Collation Charset Id Default Compiled Sortlen
498498
latin1_test latin1 331 1

0 commit comments

Comments
 (0)