@@ -321,3 +321,189 @@ def test_table_segment(self):
321
321
self .assertRaises (ValueError , self .table .replace , min_update = late , max_update = early )
322
322
323
323
self .assertRaises (ValueError , self .table .replace , min_key = 10 , max_key = 0 )
324
+
325
+
326
+ class TestTableUUID (TestWithConnection ):
327
+ def setUp (self ):
328
+ super ().setUp ()
329
+
330
+ queries = [
331
+ f"DROP TABLE IF EXISTS { self .table_src } " ,
332
+ f"DROP TABLE IF EXISTS { self .table_dst } " ,
333
+ f"CREATE TABLE { self .table_src } (id varchar(100), comment varchar(1000))" ,
334
+ ]
335
+ for i in range (10 ):
336
+ uuid_value = uuid .uuid1 (i )
337
+ queries .append (f"INSERT INTO { self .table_src } VALUES ('{ uuid_value } ', '{ uuid_value } ')" )
338
+
339
+ self .null_uuid = uuid .uuid1 (32132131 )
340
+ queries += [
341
+ f"CREATE TABLE { self .table_dst } AS SELECT * FROM { self .table_src } " ,
342
+
343
+ f"INSERT INTO { self .table_src } VALUES ('{ self .null_uuid } ', NULL)" ,
344
+
345
+ "COMMIT"
346
+ ]
347
+
348
+ for query in queries :
349
+ self .connection .query (query , None )
350
+
351
+ self .a = TableSegment (self .connection , (self .table_src ,), "id" , "comment" )
352
+ self .b = TableSegment (self .connection , (self .table_dst ,), "id" , "comment" )
353
+
354
+ def test_uuid_column_with_nulls (self ):
355
+ differ = TableDiffer ()
356
+ diff = list (differ .diff_tables (self .a , self .b ))
357
+ self .assertEqual (diff , [("-" , (str (self .null_uuid ), None ))])
358
+
359
+
360
+ class TestTableNullRowChecksum (TestWithConnection ):
361
+ def setUp (self ):
362
+ super ().setUp ()
363
+
364
+ self .null_uuid = uuid .uuid1 (1 )
365
+ queries = [
366
+ f"DROP TABLE IF EXISTS { self .table_src } " ,
367
+ f"DROP TABLE IF EXISTS { self .table_dst } " ,
368
+ f"CREATE TABLE { self .table_src } (id varchar(100), comment varchar(1000))" ,
369
+
370
+ f"INSERT INTO { self .table_src } VALUES ('{ uuid .uuid1 (1 )} ', '1')" ,
371
+
372
+ f"CREATE TABLE { self .table_dst } AS SELECT * FROM { self .table_src } " ,
373
+
374
+ # Add a row where a column has NULL value
375
+ f"INSERT INTO { self .table_src } VALUES ('{ self .null_uuid } ', NULL)" ,
376
+
377
+ "COMMIT"
378
+ ]
379
+
380
+ for query in queries :
381
+ self .connection .query (query , None )
382
+
383
+ self .a = TableSegment (self .connection , (self .table_src ,), "id" , "comment" )
384
+ self .b = TableSegment (self .connection , (self .table_dst ,), "id" , "comment" )
385
+
386
+ def test_uuid_columns_with_nulls (self ):
387
+ """
388
+ Here we test a case when in one segment one or more columns has only null values. For example,
389
+ Table A:
390
+ | id | value |
391
+ |------|-----------|
392
+ | pk_1 | 'value_1' |
393
+ | pk_2 | NULL |
394
+
395
+ Table B:
396
+ | id | value |
397
+ |------|-----------|
398
+ | pk_1 | 'value_1' |
399
+
400
+ We can choose some bisection factor and bisection threshold (2 and 3 for our example, respectively)
401
+ that one segment will look like ('pk_2', NULL). Some databases, when we do a cast these values to string and
402
+ try to concatenate, some databases return NULL when concatenating (for example, MySQL). As the result, all next
403
+ operations like substring, sum etc return nulls that leads incorrect diff results: ('pk_2', null) should be in
404
+ diff results, but it's not. This test helps to detect such cases.
405
+ """
406
+
407
+ differ = TableDiffer (bisection_factor = 2 , bisection_threshold = 3 )
408
+ diff = list (differ .diff_tables (self .a , self .b ))
409
+ self .assertEqual (diff , [("-" , (str (self .null_uuid ), None ))])
410
+
411
+
412
+ class TestConcatMultipleColumnWithNulls (TestWithConnection ):
413
+ def setUp (self ):
414
+ super ().setUp ()
415
+
416
+ queries = [
417
+ f"DROP TABLE IF EXISTS { self .table_src } " ,
418
+ f"DROP TABLE IF EXISTS { self .table_dst } " ,
419
+ f"CREATE TABLE { self .table_src } (id varchar(100), c1 varchar(100), c2 varchar(100))" ,
420
+ f"CREATE TABLE { self .table_dst } (id varchar(100), c1 varchar(100), c2 varchar(100))" ,
421
+ ]
422
+
423
+ self .diffs = []
424
+ for i in range (0 , 8 ):
425
+ pk = uuid .uuid1 (i )
426
+ table_src_c1_val = str (i )
427
+ table_dst_c1_val = str (i ) + "-different"
428
+
429
+ queries .append (f"INSERT INTO { self .table_src } VALUES ('{ pk } ', '{ table_src_c1_val } ', NULL)" )
430
+ queries .append (f"INSERT INTO { self .table_dst } VALUES ('{ pk } ', '{ table_dst_c1_val } ', NULL)" )
431
+
432
+ self .diffs .append (("-" , (str (pk ), table_src_c1_val , None )))
433
+ self .diffs .append (("+" , (str (pk ), table_dst_c1_val , None )))
434
+
435
+ queries .append ("COMMIT" )
436
+
437
+ for query in queries :
438
+ self .connection .query (query , None )
439
+
440
+ self .a = TableSegment (self .connection , (self .table_src ,), "id" , extra_columns = ("c1" , "c2" ))
441
+ self .b = TableSegment (self .connection , (self .table_dst ,), "id" , extra_columns = ("c1" , "c2" ))
442
+
443
+ def test_tables_are_different (self ):
444
+ """
445
+ Here we test a case when in one segment one or more columns has only null values. For example,
446
+ Table A:
447
+ | id | c1 | c2 |
448
+ |------|----|------|
449
+ | pk_1 | 1 | NULL |
450
+ | pk_2 | 2 | NULL |
451
+ ...
452
+ | pk_n | n | NULL |
453
+
454
+ Table B:
455
+ | id | c1 | c2 |
456
+ |------|--------|------|
457
+ | pk_1 | 1-diff | NULL |
458
+ | pk_2 | 2-diff | NULL |
459
+ ...
460
+ | pk_n | n-diff | NULL |
461
+
462
+ To calculate a checksum, we need to concatenate string values by rows. If both tables have columns with NULL
463
+ value, it may lead that concat(pk_i, i, NULL) == concat(pk_i, i-diff, NULL). This test handle such cases.
464
+ """
465
+
466
+ differ = TableDiffer (bisection_factor = 2 , bisection_threshold = 4 )
467
+ diff = list (differ .diff_tables (self .a , self .b ))
468
+ self .assertEqual (diff , self .diffs )
469
+
470
+
471
+ class TestTableTableEmpty (TestWithConnection ):
472
+ def setUp (self ):
473
+ super ().setUp ()
474
+
475
+ self .null_uuid = uuid .uuid1 (1 )
476
+ queries = [
477
+ f"DROP TABLE IF EXISTS { self .table_src } " ,
478
+ f"DROP TABLE IF EXISTS { self .table_dst } " ,
479
+ f"CREATE TABLE { self .table_src } (id varchar(100), comment varchar(1000))" ,
480
+ f"CREATE TABLE { self .table_dst } (id varchar(100), comment varchar(1000))" ,
481
+ ]
482
+
483
+ self .diffs = [(uuid .uuid1 (i ), i ) for i in range (100 )]
484
+ for pk , value in self .diffs :
485
+ queries .append (f"INSERT INTO { self .table_src } VALUES ('{ pk } ', '{ value } ')" )
486
+
487
+ queries .append ("COMMIT" )
488
+
489
+ for query in queries :
490
+ self .connection .query (query , None )
491
+
492
+ self .a = TableSegment (self .connection , (self .table_src ,), "id" , "comment" )
493
+ self .b = TableSegment (self .connection , (self .table_dst ,), "id" , "comment" )
494
+
495
+ def test_right_table_empty (self ):
496
+ differ = TableDiffer ()
497
+ self .assertRaises (ValueError , differ .diff_tables , self .a , self .b )
498
+
499
+ def test_left_table_empty (self ):
500
+ queries = [
501
+ f"INSERT INTO { self .table_dst } SELECT id, comment FROM { self .table_src } " ,
502
+ f"TRUNCATE { self .table_src } " ,
503
+ "COMMIT"
504
+ ]
505
+ for query in queries :
506
+ self .connection .query (query , None )
507
+
508
+ differ = TableDiffer ()
509
+ self .assertRaises (ValueError , differ .diff_tables , self .a , self .b )
0 commit comments