From 45cb4626a62e99bb96b34fa4862850c52b6301b5 Mon Sep 17 00:00:00 2001 From: Morgan Tocker Date: Sat, 15 Feb 2025 09:59:59 -0700 Subject: [PATCH 1/3] Fix converting latin1 to utf8 charset checksum failures --- pkg/migration/migration_test.go | 41 +++++++++++++++++++++++++++++++++ pkg/table/tableinfo_test.go | 2 +- pkg/table/utils.go | 7 +++--- pkg/table/utils_test.go | 19 +++++++-------- 4 files changed, 56 insertions(+), 13 deletions(-) diff --git a/pkg/migration/migration_test.go b/pkg/migration/migration_test.go index 5ba3751..05cc01e 100644 --- a/pkg/migration/migration_test.go +++ b/pkg/migration/migration_test.go @@ -291,3 +291,44 @@ func TestBinaryChecksum(t *testing.T) { assert.NoError(t, err) } } + +// TestConvertCharset tests that we can change the character set from latin1 to utf8mb4, +// and that the non 7-bit characters that can be represented in latin1 as 1 byte, +// checksum correctly against their multi-byte utf8mb4 representations +func TestConvertCharset(t *testing.T) { + testutils.RunSQL(t, `DROP TABLE IF EXISTS t1charset, _t1charset_new`) + table := `CREATE TABLE t1charset ( + id int not null primary key auto_increment, + b varchar(100) not null + ) charset=latin1` + testutils.RunSQL(t, table) + testutils.RunSQL(t, `insert into t1charset values (null, 'à'), (null, '€')`) + migration := &Migration{} + cfg, err := mysql.ParseDSN(testutils.DSN()) + assert.NoError(t, err) + migration.Host = cfg.Addr + migration.Username = cfg.User + migration.Password = cfg.Passwd + migration.Database = cfg.DBName + migration.Threads = 1 + migration.Checksum = true + migration.Table = "t1charset" + migration.Alter = "CONVERT TO CHARACTER SET UTF8MB4" + err = migration.Run() + assert.NoError(t, err) + + // Because utf8mb4 is the superset, it doesn't matter that thats + // what the checksum casts to. We should be able to convert back as well. + migration = &Migration{ + Host: cfg.Addr, + Username: cfg.User, + Password: cfg.Passwd, + Database: cfg.DBName, + Threads: 1, + Checksum: true, + Table: "t1charset", + Alter: "CONVERT TO CHARACTER SET latin1", + } + err = migration.Run() + assert.NoError(t, err) +} diff --git a/pkg/table/tableinfo_test.go b/pkg/table/tableinfo_test.go index 2b8c38b..6d2e423 100644 --- a/pkg/table/tableinfo_test.go +++ b/pkg/table/tableinfo_test.go @@ -87,7 +87,7 @@ func TestDiscovery(t *testing.T) { // normalize for mysql 5.7 and 8.0 assert.Equal(t, "int", removeWidth(t1.columnsMySQLTps["id"])) assert.Equal(t, "CAST(`id` AS signed)", t1.WrapCastType("id")) - assert.Equal(t, "CAST(`name` AS char)", t1.WrapCastType("name")) + assert.Equal(t, "CAST(`name` AS char CHARACTER SET utf8mb4)", t1.WrapCastType("name")) assert.Equal(t, "1", t1.minValue.String()) assert.Equal(t, "3", t1.maxValue.String()) diff --git a/pkg/table/utils.go b/pkg/table/utils.go index 1d7e0ae..c084ec6 100644 --- a/pkg/table/utils.go +++ b/pkg/table/utils.go @@ -34,8 +34,6 @@ func castableTp(tp string) string { return "unsigned" case "timestamp", "datetime": return "datetime" - case "varchar", "enum", "set", "text", "mediumtext", "longtext": - return "char" case "tinyblob", "blob", "mediumblob", "longblob", "varbinary": return "binary" case "binary": @@ -47,7 +45,10 @@ func castableTp(tp string) string { case "decimal": return tp default: - return "char" // default to char because it ends up being a char string in the concat anyway. + // For cases like varchar, enum, set, text, mediumtext, longtext + // We return char, but because the new table could also change charset we explicitly + // convert to utf8mb4 which should be the superset, and can do all comparisons. + return "char CHARACTER SET utf8mb4" } } diff --git a/pkg/table/utils_test.go b/pkg/table/utils_test.go index 0d43168..d55026d 100644 --- a/pkg/table/utils_test.go +++ b/pkg/table/utils_test.go @@ -1,6 +1,7 @@ package table import ( + "fmt" "testing" "time" @@ -42,20 +43,20 @@ func TestCastableTp(t *testing.T) { {"bigint unsigned", "unsigned"}, {"timestamp", "datetime"}, {"timestamp(6)", "datetime"}, - {"varchar(100)", "char"}, - {"text", "char"}, - {"mediumtext", "char"}, - {"longtext", "char"}, + {"varchar(100)", "char CHARACTER SET utf8mb4"}, + {"text", "char CHARACTER SET utf8mb4"}, + {"mediumtext", "char CHARACTER SET utf8mb4"}, + {"longtext", "char CHARACTER SET utf8mb4"}, {"tinyblob", "binary"}, {"blob", "binary"}, {"mediumblob", "binary"}, {"longblob", "binary"}, {"varbinary", "binary"}, - {"char(100)", "char"}, + {"char(100)", "char CHARACTER SET utf8mb4"}, {"binary(100)", "binary(0)"}, {"datetime", "datetime"}, {"datetime(6)", "datetime"}, - {"year", "char"}, + {"year", "char CHARACTER SET utf8mb4"}, {"float", "char"}, {"double", "char"}, {"json", "json"}, @@ -63,12 +64,12 @@ func TestCastableTp(t *testing.T) { {"int(11) unsigned", "unsigned"}, {"int(11) zerofill", "signed"}, {"int(11) unsigned zerofill", "unsigned"}, - {"enum('a', 'b', 'c')", "char"}, - {"set('a', 'b', 'c')", "char"}, + {"enum('a', 'b', 'c')", "char CHARACTER SET utf8mb4"}, + {"set('a', 'b', 'c')", "char CHARACTER SET utf8mb4"}, {"decimal(6,2)", "decimal(6,2)"}, } for _, tp := range tps { - assert.Equal(t, tp.expected, castableTp(tp.tp)) + assert.Equal(t, tp.expected, castableTp(tp.tp), fmt.Sprintf("tp failed: %s, expected: %s\n", tp.tp, tp.expected)) } } From 6674bb75e883d3a5eea0d6de1b21ef6478b86a7a Mon Sep 17 00:00:00 2001 From: Morgan Tocker Date: Sat, 15 Feb 2025 10:08:34 -0700 Subject: [PATCH 2/3] fix linter --- .golangci.yaml | 4 ---- pkg/dbconn/sqlescape/utils_test.go | 8 ++++---- pkg/table/chunker_composite.go | 2 +- pkg/table/chunker_optimistic_test.go | 2 +- pkg/table/utils_test.go | 3 +-- 5 files changed, 7 insertions(+), 12 deletions(-) diff --git a/.golangci.yaml b/.golangci.yaml index c4fed25..3e020c3 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -53,8 +53,6 @@ linters: - exportloopref linters-settings: - govet: - check-shadowing: false dupl: threshold: 100 gocyclo: @@ -69,8 +67,6 @@ linters-settings: - require-error issues: - max-per-linter: 0 - max-same: 0 exclude-use-default: false exclude: - '^(G104|G204):' diff --git a/pkg/dbconn/sqlescape/utils_test.go b/pkg/dbconn/sqlescape/utils_test.go index f8d15c6..754b4a9 100644 --- a/pkg/dbconn/sqlescape/utils_test.go +++ b/pkg/dbconn/sqlescape/utils_test.go @@ -464,25 +464,25 @@ func TestEscapeString(t *testing.T) { } func BenchmarkEscapeString(b *testing.B) { - for i := 0; i < b.N; i++ { + for range b.N { escapeSQL("select %?", "3") //nolint:errcheck } } func BenchmarkUnderlyingString(b *testing.B) { - for i := 0; i < b.N; i++ { + for range b.N { escapeSQL("select %?", myStr("3")) //nolint:errcheck } } func BenchmarkEscapeInt(b *testing.B) { - for i := 0; i < b.N; i++ { + for range b.N { escapeSQL("select %?", 3) //nolint:errcheck } } func BenchmarkUnderlyingInt(b *testing.B) { - for i := 0; i < b.N; i++ { + for range b.N { escapeSQL("select %?", myInt(3)) //nolint:errcheck } } diff --git a/pkg/table/chunker_composite.go b/pkg/table/chunker_composite.go index 413eb91..c0fdef7 100644 --- a/pkg/table/chunker_composite.go +++ b/pkg/table/chunker_composite.go @@ -5,12 +5,12 @@ import ( "errors" "fmt" "reflect" + "slices" "strings" "sync" "time" "github.com/siddontang/loggers" - "golang.org/x/exp/slices" ) type chunkerComposite struct { diff --git a/pkg/table/chunker_optimistic_test.go b/pkg/table/chunker_optimistic_test.go index 5413728..6a7e83a 100644 --- a/pkg/table/chunker_optimistic_test.go +++ b/pkg/table/chunker_optimistic_test.go @@ -58,7 +58,7 @@ func TestOptimisticChunkerBasic(t *testing.T) { assert.False(t, chunker.KeyAboveHighWatermark(100)) // we are at 1001 - for i := 0; i <= 998; i++ { + for range 999 { _, err = chunker.Next() assert.NoError(t, err) } diff --git a/pkg/table/utils_test.go b/pkg/table/utils_test.go index d55026d..a87ecf2 100644 --- a/pkg/table/utils_test.go +++ b/pkg/table/utils_test.go @@ -1,7 +1,6 @@ package table import ( - "fmt" "testing" "time" @@ -69,7 +68,7 @@ func TestCastableTp(t *testing.T) { {"decimal(6,2)", "decimal(6,2)"}, } for _, tp := range tps { - assert.Equal(t, tp.expected, castableTp(tp.tp), fmt.Sprintf("tp failed: %s, expected: %s\n", tp.tp, tp.expected)) + assert.Equal(t, tp.expected, castableTp(tp.tp), "tp failed: %s, expected: %s", tp.tp, tp.expected) } } From ba4be072d3bd9f333e10f6e25f5eb36235373990 Mon Sep 17 00:00:00 2001 From: Morgan Tocker Date: Sat, 15 Feb 2025 10:13:06 -0700 Subject: [PATCH 3/3] Update pkg/migration/migration_test.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pkg/migration/migration_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/migration/migration_test.go b/pkg/migration/migration_test.go index 05cc01e..dfa548d 100644 --- a/pkg/migration/migration_test.go +++ b/pkg/migration/migration_test.go @@ -317,7 +317,7 @@ func TestConvertCharset(t *testing.T) { err = migration.Run() assert.NoError(t, err) - // Because utf8mb4 is the superset, it doesn't matter that thats + // Because utf8mb4 is the superset, it doesn't matter that that's // what the checksum casts to. We should be able to convert back as well. migration = &Migration{ Host: cfg.Addr,